diff options
Diffstat (limited to 'src/gallium/drivers')
376 files changed, 96244 insertions, 0 deletions
diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..6161cb6ff8 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_DRIVER_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 0000000000..47aef7b05f --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: + ( cd spu ; make ) + ( cd ppu ; make ) + + + +clean: + ( cd spu ; make clean ) + ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 0000000000..1f6860da11 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,376 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ + if (!(x)) { \ + ubyte *p = NULL; \ + fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ + __FILE__, __LINE__, __FUNCTION__, #x); \ + *p = 0; \ + exit(1); \ + } + + + +#define JOIN(x, y) JOIN_AGAIN(x, y) +#define JOIN_AGAIN(x, y) x ## y + +#define STATIC_ASSERT(e) \ +{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} + + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 8 + +#define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT 1 +#define CELL_CMD_CLEAR_SURFACE 2 +#define CELL_CMD_FINISH 3 +#define CELL_CMD_RENDER 4 +#define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 +#define CELL_CMD_STATE_FRAMEBUFFER 10 +#define CELL_CMD_STATE_FRAGMENT_OPS 11 +#define CELL_CMD_STATE_SAMPLER 12 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_UNIFORMS 16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 +#define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_STATE_FS_CONSTANTS 21 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 + + +/** Command/batch buffers */ +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + +/** Debug flags */ +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) +#define CELL_DEBUG_CACHE (1 << 6) + +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 + +#define CELL_FACING_FRONT 0 +#define CELL_FACING_BACK 1 + +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; + +#ifdef __SPU__ +typedef vector unsigned int opcode_t; +#else +typedef unsigned int opcode_t[4]; +#endif + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + opcode_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; + uint32_t pad_[3]; +}; + + +/** + * Command to specify per-fragment operations state and generated code. + * Note that this is a variant-length structure, allocated with as + * much memory as needed to hold the generated code; the "code" + * field *must* be the last field in the structure. Also, the entire + * length of the structure (including the variant code field) must be + * a multiple of 8 bytes; we require that this structure itself be + * a multiple of 8 bytes, and that the generated code also be a multiple + * of 8 bytes. + * + * Also note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code. They're not used when we generate + * dynamic SPU fragment code (which is the normal case), and will eventually + * be removed from this structure. + */ +struct cell_command_fragment_ops +{ + opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + + /* Fields for the fallback case */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + + /* Fields for the generated SPU code */ + unsigned total_code_size; + unsigned front_code_index; + unsigned back_code_index; + /* this field has variant length, and must be the last field in + * the structure + */ + unsigned code[0]; +}; + + +/** Max instructions for fragment programs */ +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 + +/** + * Command to send a fragment program to SPUs. + */ +struct cell_command_fragment_program +{ + opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ + uint num_inst; /**< Number of instructions */ + uint32_t pad[3]; + unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +}; + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ + opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ + int width, height; + void *color_start, *depth_start; + enum pipe_format color_format, depth_format; + uint32_t pad_[2]; +}; + + +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ + opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + uint surface; /**< Temporary: 0=color, 1=Z */ + uint value; + uint32_t pad[2]; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint size; + uint function_offset; +}; + + +struct cell_attribute_fetch_code +{ + uint64_t base; + uint size; +}; + + +struct cell_buffer_range +{ + uint64_t base; + unsigned size; +}; + + +struct cell_shader_info +{ + uint64_t declarations; + uint64_t instructions; + uint64_t immediates; + + unsigned num_outputs; + unsigned num_declarations; + unsigned num_instructions; + unsigned num_immediates; +}; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */ + uint64_t vOut[SPU_VERTS_PER_BATCH]; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +}; + + +struct cell_command_render +{ + opcode_t opcode; /**< CELL_CMD_RENDER */ + uint prim_type; /**< PIPE_PRIM_x */ + uint num_verts; + uint vertex_size; /**< bytes per vertex */ + uint num_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; + boolean inline_verts; + uint32_t pad_[1]; +}; + + +struct cell_command_release_verts +{ + opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ + uint32_t pad_[3]; +}; + + +struct cell_command_sampler +{ + opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */ + uint unit; + struct pipe_sampler_state state; + uint32_t pad_[1]; +}; + + +struct cell_command_texture +{ + opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint target; /**< PIPE_TEXTURE_x */ + uint unit; + void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ + ushort width[CELL_MAX_TEXTURE_LEVELS]; + ushort height[CELL_MAX_TEXTURE_LEVELS]; + ushort depth[CELL_MAX_TEXTURE_LEVELS]; +}; + + +#define MAX_SPU_FUNCTIONS 12 +/** + * Used to tell the PPU about the address of particular functions in the + * SPU's address space. + */ +struct cell_spu_function_info +{ + uint num; + char names[MAX_SPU_FUNCTIONS][16]; + uint addrs[MAX_SPU_FUNCTIONS]; + char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ +}; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ + unsigned id; + unsigned num_spus; + unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ + float inv_timebase; /**< 1.0/timebase, for perf measurement */ + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; + uint *buffer_status; /**< points at cell_context->buffer_status */ + + struct cell_spu_function_info *spu_functions; +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 0000000000..4d6ac0d266 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,87 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the libcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +# This is the "top-level" cell PPU driver code, will get pulled into libGL.so +# by the winsys Makefile. +CELL_LIB = ../libcell.a + + +# This is the SPU code. We'd like to be able to put this into the libcell.a +# archive with the PPU code, but nesting .a libs doesn't seem to work. +# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ + cell_batch.c \ + cell_clear.c \ + cell_context.c \ + cell_draw_arrays.c \ + cell_fence.c \ + cell_flush.c \ + cell_gen_fragment.c \ + cell_gen_fp.c \ + cell_state_derived.c \ + cell_state_emit.c \ + cell_state_shader.c \ + cell_pipe_state.c \ + cell_screen.c \ + cell_state_vertex.c \ + cell_spu.c \ + cell_surface.c \ + cell_texture.c \ + cell_vbuf.c \ + cell_vertex_fetch.c \ + cell_vertex_shader.c \ + cell_winsys.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +.c.s: + $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work + ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: + rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 0000000000..fe144f8b84 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,260 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_spu.h" + + + +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + static uint prev_buffer = 0; + uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; + uint tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); + */ + prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + struct cell_fence *fence = &cell->fenced_buffers[batch].fence; + uint i; + + /* set fence status to emitted, not yet signalled */ + for (i = 0; i < cell->num_spus; i++) { + fence->status[i][0] = CELL_FENCE_EMITTED; + } + + STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); + ASSERT(size % 16 == 0); + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode[0] = CELL_CMD_FENCE; + fence_cmd->fence = fence; + + /* update batch buffer size */ + cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); +} + + +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ +void +cell_batch_flush(struct cell_context *cell) +{ + static boolean flushing = FALSE; + uint batch = cell->cur_batch; + uint size = cell->buffer_size[batch]; + uint spu, cmd_word; + + assert(!flushing); + + if (size == 0) + return; + + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) { + emit_fence(cell); + size = cell->buffer_size[batch]; + } + + flushing = TRUE; + + assert(batch < CELL_NUM_BUFFERS); + + /* + printf("cell_batch_dispatch: buf %u at %p, size %u\n", + batch, &cell->buffer[batch][0], size); + */ + + /* + * Build "BATCH" command and send to all SPUs. + */ + cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + + for (spu = 0; spu < cell->num_spus; spu++) { + assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); + send_mbox_message(cell_global.spe_contexts[spu], cmd_word); + } + + /* When the SPUs are done copying the buffer into their locals stores + * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] + * array indicating that the PPU can re-use the buffer. + */ + + batch = cell_get_empty_buffer(cell); + + cell->buffer_size[batch] = 0; /* empty */ + cell->cur_batch = batch; + + flushing = FALSE; +} + + +/** + * Return the number of bytes free in the current batch buffer. + */ +uint +cell_batch_free_space(const struct cell_context *cell) +{ + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); + return free; +} + + +/** + * Allocate space in the current batch buffer for 'bytes' space. + * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. + * \return address in batch buffer to put data + */ +void * +cell_batch_alloc16(struct cell_context *cell, uint bytes) +{ + void *pos; + uint size; + + ASSERT(bytes % 16 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + if (bytes > cell_batch_free_space(cell)) { + cell_batch_flush(cell); + size = 0; + } + + ASSERT(size % 16 == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + pos = (void *) (cell->buffer[cell->cur_batch] + size); + + cell->buffer_size[cell->cur_batch] = size + bytes; + + return pos; +} + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ + uint spu, buf; + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 0000000000..290136031a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void * +cell_batch_alloc16(struct cell_context *cell, uint bytes); + +extern void +cell_init_batch_buffers(struct cell_context *cell); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 0000000000..c2e276988c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + +#include <stdio.h> +#include <assert.h> +#include <stdint.h> +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_state.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Called via pipe->clear() + */ +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct pipe_screen *screen = pipe->screen; + struct cell_context *cell = cell_context(pipe); + uint surfIndex; + + if (cell->dirty) + cell_update_derived(cell); + + + if (!cell->cbuf_map[0]) + cell->cbuf_map[0] = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_GPU_WRITE); + + if (ps == cell->framebuffer.zsbuf) { + /* clear z/stencil buffer */ + surfIndex = 1; + } + else { + /* clear color buffer */ + surfIndex = 0; + + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + } + + + /* Build a CLEAR command and place it in the current batch buffer */ + { + STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) + cell_batch_alloc16(cell, sizeof(*clr)); + clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; + clr->surface = surfIndex; + clr->value = clearValue; + } + + /* Technically, the surface's contents are now known and cleared, + * so we could set the status to PIPE_SURFACE_STATUS_CLEAR. But + * it turns out it's quite painful to recognize when any particular + * surface goes from PIPE_SURFACE_STATUS_CLEAR to + * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because + * the drawing commands could be operating on numerous draw buffers, + * which we'd have to iterate through to set all their stati... + * For now, we cheat a bit and set the surface's status to DEFINED + * right here. Later we should revisit this and set the status to + * CLEAR here, and find a better place to set the status to DEFINED. + */ + ps->status = PIPE_SURFACE_STATUS_DEFINED; +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 0000000000..ff47d43f4c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 0000000000..8f502823f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" + +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +#include "cell/common.h" +#include "cell_batch.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_pipe_state.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ + struct cell_context *cell = cell_context(pipe); + + util_delete_keymap(cell->fragment_ops_cache, NULL); + + cell_spu_exit(cell); + + align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + +#if 0 /* broken */ + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } +#endif + + return draw; +} + + +static const struct debug_named_value cell_debug_flags[] = { + {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ + {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ + {NULL, 0} +}; + + +struct pipe_context * +cell_create_context(struct pipe_screen *screen, + struct cell_winsys *cws) +{ + struct cell_context *cell; + uint i; + + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); + if (!cell) + return NULL; + + memset(cell, 0, sizeof(*cell)); + + cell->winsys = cws; + cell->pipe.winsys = screen->winsys; + cell->pipe.screen = screen; + cell->pipe.destroy = cell_destroy_context; + + cell->pipe.clear = cell_clear_surface; + cell->pipe.flush = cell_flush; + +#if 0 + cell->pipe.begin_query = cell_begin_query; + cell->pipe.end_query = cell_end_query; + cell->pipe.wait_query = cell_wait_query; +#endif + + cell_init_draw_functions(cell); + cell_init_state_functions(cell); + cell_init_shader_functions(cell); + cell_init_surface_functions(cell); + cell_init_vertex_functions(cell); + + cell->draw = cell_draw_create(cell); + + /* Create cache of fragment ops generated code */ + cell->fragment_ops_cache = + util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); + + cell_init_vbuf(cell); + + draw_set_rasterize_stage(cell->draw, cell->vbuf); + + /* convert all points/lines to tris for the time being */ + draw_wide_point_threshold(cell->draw, 0.0); + draw_wide_line_threshold(cell->draw, 0.0); + + /* get env vars or read config file to get debug flags */ + cell->debug_flags = debug_get_flags_option("CELL_DEBUG", + cell_debug_flags, + 0 ); + + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + + /* + * SPU stuff + */ + /* This call only works with SDK 3.0. Anyone still using 2.1??? */ + cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); + cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); + if (cell->debug_flags) { + printf("Cell: found %d Cell(s) with %u SPUs\n", + cell->num_cells, cell->num_spus); + } + if (getenv("CELL_NUM_SPUS")) { + cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); + assert(cell->num_spus > 0); + } + + cell_start_spus(cell); + + cell_init_batch_buffers(cell); + + /* make sure SPU initializations are done before proceeding */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 0000000000..eb1397bb3f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "cell/common.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_keymap.h" + + +struct cell_vbuf_render; + + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ +struct cell_vertex_shader_state +{ + struct pipe_shader_state shader; + struct tgsi_shader_info info; + void *draw_data; +}; + + +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ +struct cell_fragment_shader_state +{ + struct pipe_shader_state shader; + struct tgsi_shader_info info; + struct spe_function code; + void *data; +}; + + +/** + * Key for mapping per-fragment state to cached SPU machine code. + * keymap(cell_fragment_ops_key) => cell_command_fragment_ops + */ +struct cell_fragment_ops_key +{ + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_depth_stencil_alpha_state dsa; + enum pipe_format color_format; + enum pipe_format zs_format; +}; + + +struct cell_buffer_node; + +/** + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ + struct cell_fence fence ALIGN16_ATTRIB; + struct cell_buffer_node *head; +}; + + +/** + * Per-context state, subclass of pipe_context. + */ +struct cell_context +{ + struct pipe_context pipe; + + struct cell_winsys *winsys; + + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + uint num_samplers; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct cell_vertex_shader_state *vs; + const struct cell_fragment_shader_state *fs; + + struct spe_function logic_op; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + uint num_textures; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + uint num_vertex_buffers; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + uint num_vertex_elements; + + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; + ubyte *zsbuf_map; + + struct pipe_surface *tex_surf; + uint *tex_map; + + uint dirty; + uint dirty_textures; /* bitmask of texture units */ + uint dirty_samplers; /* bitmask of sampler units */ + + /** Cache of code generated for per-fragment ops */ + struct keymap *fragment_ops_cache; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *render_stage; + + /** For post-transformed vertex buffering: */ + struct cell_vbuf_render *vbuf_render; + struct draw_stage *vbuf; + + struct vertex_info vertex_info; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + + uint num_cells, num_spus; + + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ + + /** [4] to ensure 16-byte alignment for each status word */ + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + + struct spe_function attrib_fetch; + unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + + unsigned debug_flags; +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ + return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 0000000000..880d535320 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" +#include "cell_flush.h" + +#include "draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, sp->mapped_constants[i], + sp->constants[i].buffer->size); + } + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +static boolean +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *sp = cell_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + if (sp->dirty) + cell_update_derived( sp ); + +#if 0 + cell_map_surfaces(sp); +#endif + cell_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } + + /* Note: leave drawing surfaces mapped */ + cell_unmap_constant_buffers(sp); + + return TRUE; +} + + +static boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + +static void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct cell_context *cell = cell_context(pipe); + draw_set_edgeflags(cell->draw, edgeflags); +} + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 0000000000..148873aa67 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +extern void +cell_init_draw_functions(struct cell_context *cell); + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..867b5dcaa0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <unistd.h> +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + ASSERT_ALIGN16(fence->status); + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + if (fence->status[i][0] != CELL_FENCE_SIGNALLED) + return FALSE; + /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } + +#ifdef DEBUG + { + uint i; + for (i = 0; i < cell->num_spus; i++) { + assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); + } + } +#endif +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 0000000000..8275c9dc9c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "draw/draw_context.h" + + +/** + * Called via pipe->flush() + */ +void +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct cell_context *cell = cell_context(pipe); + + if (fence) { + *fence = NULL; + /* XXX: Implement real fencing */ + flags |= CELL_FLUSH_WAIT; + } + + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) + flags |= CELL_FLUSH_WAIT; + + draw_flush( cell->draw ); + cell_flush_int(cell, flags); +} + + +/** + * Cell internal flush function. Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero + */ +void +cell_flush_int(struct cell_context *cell, unsigned flags) +{ + static boolean flushing = FALSE; /* recursion catcher */ + uint i; + + ASSERT(!flushing); + flushing = TRUE; + + if (flags & CELL_FLUSH_WAIT) { + STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); + opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); + *cmd[0] = CELL_CMD_FINISH; + } + + cell_batch_flush(cell); + +#if 0 + /* Send CMD_FINISH to all SPUs */ + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); + } +#endif + + if (flags & CELL_FLUSH_WAIT) { + /* Wait for ack */ + for (i = 0; i < cell->num_spus; i++) { + uint k = wait_mbox_message(cell_global.spe_contexts[i]); + assert(k == CELL_CMD_FINISH); + } + } + + flushing = FALSE; +} + + +void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size) +{ + STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); + uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, + sizeof(opcode_t) + sizeof(struct cell_buffer_range)); + struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; + batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; + br->base = (uintptr_t) ptr; + br->size = size; +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 0000000000..509ae6239a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +#define CELL_FLUSH_WAIT 0x80000000 + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +extern void +cell_flush_int(struct cell_context *cell, unsigned flags); + +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c new file mode 100644 index 0000000000..5a889a6119 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -0,0 +1,2046 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU fragment program/shader code. + * + * Note that we generate SOA-style code here. So each TGSI instruction + * operates on four pixels (and is translated into four SPU instructions, + * generally speaking). + * + * \author Brian Paul + */ + +#include <math.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fp.h" + + +#define MAX_TEMPS 16 +#define MAX_IMMED 8 + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +/** + * Context needed during code generation. + */ +struct codegen +{ + struct cell_context *cell; + int inputs_reg; /**< 1st function parameter */ + int outputs_reg; /**< 2nd function parameter */ + int constants_reg; /**< 3rd function parameter */ + int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ + int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ + + int num_imm; /**< number of immediates */ + + int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ + + int addr_reg; /**< address register, integer values */ + + /** Per-instruction temps / intermediate temps */ + int num_itemps; + int itemps[12]; + + /** Current IF/ELSE/ENDIF nesting level */ + int if_nesting; + /** Current BGNLOOP/ENDLOOP nesting level */ + int loop_nesting; + /** Location of start of current loop */ + int loop_start; + + /** Index of if/conditional mask register */ + int cond_mask_reg; + /** Index of loop mask register */ + int loop_mask_reg; + + /** Index of master execution mask register */ + int exec_mask_reg; + + /** KIL mask: indicates which fragments have been killed */ + int kill_mask_reg; + + int frame_size; /**< Stack frame size, in words */ + + struct spe_function *f; + boolean error; +}; + + +/** + * Allocate an intermediate temporary register. + */ +static int +get_itemp(struct codegen *gen) +{ + int t = spe_allocate_available_register(gen->f); + assert(gen->num_itemps < Elements(gen->itemps)); + gen->itemps[gen->num_itemps++] = t; + return t; +} + +/** + * Free all intermediate temporary registers. To be called after each + * instruction has been emitted. + */ +static void +free_itemps(struct codegen *gen) +{ + int i; + for (i = 0; i < gen->num_itemps; i++) { + spe_release_register(gen->f, gen->itemps[i]); + } + gen->num_itemps = 0; +} + + +/** + * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. + * The register is allocated and initialized upon the first call. + */ +static int +get_const_one_reg(struct codegen *gen) +{ + if (gen->one_reg <= 0) { + gen->one_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "init constant reg = 1.0:"); + + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); + + spe_indent(gen->f, -4); + } + + return gen->one_reg; +} + + +/** + * Return index of the address register. + * Used for indirect register loads/stores. + */ +static int +get_address_reg(struct codegen *gen) +{ + if (gen->addr_reg <= 0) { + gen->addr_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "init address reg = 0:"); + + /* init addr = {0, 0, 0, 0} */ + spe_zero(gen->f, gen->addr_reg); + + spe_indent(gen->f, -4); + } + + return gen->addr_reg; +} + + +/** + * Return index of the master execution mask. + * The register is allocated an initialized upon the first call. + * + * The master execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ + if (gen->exec_mask_reg <= 0) { + gen->exec_mask_reg = spe_allocate_available_register(gen->f); + + /* XXX this may not be needed */ + spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); + spe_load_int(gen->f, gen->exec_mask_reg, ~0); + } + + return gen->exec_mask_reg; +} + + +/** Return index of the conditional (if/else) execution mask register */ +static int +get_cond_mask_reg(struct codegen *gen) +{ + if (gen->cond_mask_reg <= 0) { + gen->cond_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->cond_mask_reg; +} + + +/** Return index of the loop execution mask register */ +static int +get_loop_mask_reg(struct codegen *gen) +{ + if (gen->loop_mask_reg <= 0) { + gen->loop_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->loop_mask_reg; +} + + + +static boolean +is_register_src(struct codegen *gen, int channel, + const struct tgsi_full_src_register *src) +{ + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + + if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { + return FALSE; + } + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || + src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + return TRUE; + } + return FALSE; +} + + +static boolean +is_memory_dst(struct codegen *gen, int channel, + const struct tgsi_full_dst_register *dst) +{ + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + return TRUE; + } + else { + return FALSE; + } +} + + +/** + * Return the index of the SPU temporary containing the named TGSI + * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we + * just return the corresponding SPE register. If the TGIS register + * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register + * and emit an SPE load instruction. + */ +static int +get_src_reg(struct codegen *gen, + int channel, + const struct tgsi_full_src_register *src) +{ + int reg = -1; + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + boolean reg_is_itemp = FALSE; + uint sign_op; + + assert(swizzle >= TGSI_SWIZZLE_X); + assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + + if (swizzle == TGSI_EXTSWIZZLE_ONE) { + /* Load const one float and early out */ + reg = get_const_one_reg(gen); + } + else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { + /* Load const zero float and early out */ + reg = get_itemp(gen); + spe_xor(gen->f, reg, reg, reg); + } + else { + int index = src->SrcRegister.Index; + + assert(swizzle < 4); + + if (src->SrcRegister.Indirect) { + /* XXX unfinished */ + } + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[index][swizzle]; + break; + case TGSI_FILE_INPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); + } + break; + case TGSI_FILE_IMMEDIATE: + reg = gen->imm_regs[index][swizzle]; + break; + case TGSI_FILE_CONSTANT: + { + /* offset is measured in quadwords, not bytes */ + int offset = index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); + } + break; + default: + assert(0); + } + } + + /* + * Handle absolute value, negate or set-negative of src register. + */ + sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + /* + * All sign ops are done by manipulating bit 31, the IEEE float sign bit. + */ + const int bit31mask_reg = get_itemp(gen); + int result_reg; + + if (reg_is_itemp) { + /* re-use 'reg' for the result */ + result_reg = reg; + } + else { + /* alloc a new reg for the result */ + result_reg = get_itemp(gen); + } + + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + + if (sign_op == TGSI_UTIL_SIGN_CLEAR) { + spe_andc(gen->f, result_reg, reg, bit31mask_reg); + } + else if (sign_op == TGSI_UTIL_SIGN_SET) { + spe_and(gen->f, result_reg, reg, bit31mask_reg); + } + else { + assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); + spe_xor(gen->f, result_reg, reg, bit31mask_reg); + } + + reg = result_reg; + } + + return reg; +} + + +/** + * Return the index of an SPE register to use for the given TGSI register. + * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the + * corresponding SPE register is returned. If the TGSI register is + * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. + * See store_dest_reg() below... + */ +static int +get_dst_reg(struct codegen *gen, + int channel, + const struct tgsi_full_dst_register *dest) +{ + int reg = -1; + + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + if (gen->if_nesting > 0 || gen->loop_nesting > 0) + reg = get_itemp(gen); + else + reg = gen->temp_regs[dest->DstRegister.Index][channel]; + break; + case TGSI_FILE_OUTPUT: + reg = get_itemp(gen); + break; + default: + assert(0); + } + + return reg; +} + + +/** + * When a TGSI instruction is writing to an output register, this + * function emits the SPE store instruction to store the value_reg. + * \param value_reg the SPE register containing the value to store. + * This would have been returned by get_dst_reg(). + */ +static void +store_dest_reg(struct codegen *gen, + int value_reg, int channel, + const struct tgsi_full_dst_register *dest) +{ + /* + * XXX need to implement dst reg clamping/saturation + */ +#if 0 + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + break; + case TGSI_SAT_MINUS_PLUS_ONE: + break; + default: + assert( 0 ); + } +#endif + + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { + int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int exec_reg = get_exec_mask_reg(gen); + /* Mix d with new value according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); + } + else { + /* we're not inside a condition or loop: do nothing special */ + + } + break; + case TGSI_FILE_OUTPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = dest->DstRegister.Index * 4 + channel; + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { + int exec_reg = get_exec_mask_reg(gen); + int curval_reg = get_itemp(gen); + /* First read the current value from memory: + * Load: curval = memory[(machine_reg) + offset] + */ + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + /* Mix curval with newvalue according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); + /* Store: memory[(machine_reg) + offset] = curval */ + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + } + else { + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); + } + } + break; + default: + assert(0); + } +} + + + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 1024; /* XXX temporary, should be dynamic */ + + spe_comment(gen->f, 0, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + int sp_reg = spe_allocate_available_register(gen->f); + /* offset = -framesize */ + spe_load_int(gen->f, offset_reg, -gen->frame_size); + /* sp = $sp */ + spe_move(gen->f, sp_reg, SPE_REG_SP); + /* $sp = $sp + offset_reg */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* save $sp in stack frame */ + spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); + /* clean up */ + spe_release_register(gen->f, offset_reg); + spe_release_register(gen->f, sp_reg); + } + else { + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + } +} + + +static void +emit_epilogue(struct codegen *gen) +{ + const int return_reg = 3; + + spe_comment(gen->f, 0, "Function epilogue:"); + + spe_comment(gen->f, 0, "return the killed mask"); + if (gen->kill_mask_reg > 0) { + /* shader called KIL, return the "alive" mask */ + spe_move(gen->f, return_reg, gen->kill_mask_reg); + } + else { + /* return {0,0,0,0} */ + spe_load_uint(gen->f, return_reg, 0); + } + + spe_comment(gen->f, 0, "restore stack and return"); + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + /* offset = framesize */ + spe_load_int(gen->f, offset_reg, gen->frame_size); + /* $sp = $sp + offset */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* clean up */ + spe_release_register(gen->f, offset_reg); + } + else { + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + } + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + +#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ + for (ch = 0; ch < 4; ch++) \ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + + +static boolean +emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch = 0, src_reg, addr_reg; + + src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + addr_reg = get_address_reg(gen); + + /* convert float to int */ + spe_cflts(gen->f, addr_reg, src_reg, 0); + + free_itemps(gen); + + return TRUE; +} + + +static boolean +emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, src_reg[4], dst_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && + is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + /* special-case: register to memory store */ + store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + } + else { + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + + return TRUE; +} + +/** + * Emit binary operation + */ +static boolean +emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + + /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* Emit actual SPE instruction: d = s1 + s2 */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SUB: + spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_MUL: + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + default: + ; + } + } + + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + /* Free any intermediate temps we allocated */ + free_itemps(gen); + + return TRUE; +} + + +/** + * Emit multiply add. See emit_ADD for comments. + */ +static boolean +emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + free_itemps(gen); + return TRUE; +} + + +/** + * Emit linear interpolate. See emit_ADD for comments. + */ +static boolean +emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; + + /* setup/get src/dst/temp regs */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* d = s3 + s1(s2 - s3) */ + /* do all subtracts, then all fma, then all stores to better pipeline */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + free_itemps(gen); + return TRUE; +} + + + +/** + * Emit reciprocal or recip sqrt. + */ +static boolean +emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { + /* tmp = 1/s1 */ + spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); + } + else { + /* tmp = 1/sqrt(s1) */ + spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); + } + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = float_interp(s1, tmp) */ + spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit absolute value. See emit_ADD for comments. + */ +static boolean +emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4]; + const int bit31mask_reg = get_itemp(gen); + + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* d = sign bit cleared in s1 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 3 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s1x_reg, s1y_reg, s1z_reg; + int s2x_reg, s2y_reg, s2z_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 4 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s0x_reg, s0y_reg, s0z_reg, s0w_reg; + int s1x_reg, s1y_reg, s1z_reg, s1w_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); + + /* t1 = w0 * w1 + t1 */ + spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit homogeneous dot product. See emit_ADD for comments. + */ +static boolean +emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + /* XXX rewrite this function to look more like DP3/DP4 */ + int ch; + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + /* t = w1 + t */ + spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 3-component vector normalize. + */ +static boolean +emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int src_reg[3]; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + + /* t0 = x * x */ + spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); + + /* t1 = y * y */ + spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); + + /* t0 = z * z + t0 */ + spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + /* t1 = 1.0 / sqrt(t0) */ + spe_frsqest(gen->f, t1_reg, t0_reg); + spe_fi(gen->f, t1_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* dst = src[ch] * t1 */ + spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit cross product. See emit_ADD for comments. + */ +static boolean +emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = z0 * y1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = y0 * z1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = x0 * z1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = z0 * x1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = y0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = x0 * y1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit inequality instruction. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; + bool complement = FALSE; + + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SGT: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SLT: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + break; + case TGSI_OPCODE_SGE: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + complement = TRUE; + break; + case TGSI_OPCODE_SLE: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; + break; + case TGSI_OPCODE_SEQ: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SNE: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; + break; + default: + assert(0); + } + } + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = d & one_reg */ + if (complement) + spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); + else + spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit compare. + */ +static boolean +emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); + + spe_zero(gen->f, zero_reg); + + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + + return TRUE; +} + +/** + * Emit trunc. + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit floor. + * If negative int subtract one + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Compute frac = Input - FLR(Input) + */ +static boolean +emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* d = s1 - FLR(s1) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* store result */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +#if 0 +static void +print_functions(struct cell_context *cell) +{ + struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i; + for (i = 0; i < funcs->num; i++) { + printf("SPU func %u: %s at %u\n", + i, funcs->names[i], funcs->addrs[i]); + } +} +#endif + + +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ + const struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i, addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; + } + } + assert(addr && "spu function not found"); + return addr / 4; /* discard 2 least significant bits */ +} + + +/** + * Emit code to call a SPU function. + * Used to implement instructions like SIN/COS/POW/TEX/etc. + * If scalar, only the X components of the src regs are used, and the + * result is replicated across the dest register's XYZW components. + */ +static boolean +emit_function_call(struct codegen *gen, + const struct tgsi_full_instruction *inst, + char *funcname, uint num_args, boolean scalar) +{ + const uint addr = lookup_function(gen->cell, funcname); + char comment[100]; + int s_regs[3]; + int func_called = FALSE; + uint a, ch; + int retval_reg = -1; + + assert(num_args <= 3); + + snprintf(comment, sizeof(comment), "CALL %s:", funcname); + spe_comment(gen->f, -4, comment); + + if (scalar) { + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); + } + /* we'll call the function, put the return value in this register, + * then replicate it across all write-enabled components in d_reg. + */ + retval_reg = spe_allocate_available_register(gen->f); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + if (!scalar) { + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + } + } + + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + if (!scalar || !func_called) { + /* for a scalar function, we'll really only call the function once */ + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return value */ + if (scalar) + spe_move(gen->f, retval_reg, 3); + else + spe_move(gen->f, d_reg, 3); + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg && reg != retval_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + + func_called = TRUE; + } + + if (scalar) { + spe_move(gen->f, d_reg, retval_reg); + } + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + + if (scalar) { + spe_release_register(gen->f, retval_reg); + } + + return TRUE; +} + + +static boolean +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const uint target = inst->InstructionExtTexture.Texture; + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; + int ch; + int coord_regs[4], d_regs[4]; + + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + + spe_comment(gen->f, -4, "CALL tex:"); + + /* get src/dst reg info */ + for (ch = 0; ch < 4; ch++) { + coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + { + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments (XXX depends on target) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, 3 + i, coord_regs[i]); + } + spe_load_uint(gen->f, 7, unit); /* sampler unit */ + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return values (four pixel's colors) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, d_regs[i], 3 + i); + } + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_regs[0] && + reg != d_regs[1] && + reg != d_regs[2] && + reg != d_regs[3]) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + + return TRUE; +} + + +/** + * KILL if any of src reg values are less than zero. + */ +static boolean +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; + + spe_comment(gen->f, -4, "CALL kil:"); + + /* zero = {0,0,0,0} */ + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + + cmp_reg = get_itemp(gen); + + /* get src regs */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + } + + /* test if any src regs are < 0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); + } + } + + if (gen->if_nesting || gen->loop_nesting) { + /* may have been a conditional kil */ + spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); + } + + /* allocate the kill mask reg if needed */ + if (gen->kill_mask_reg <= 0) { + gen->kill_mask_reg = spe_allocate_available_register(gen->f); + spe_move(gen->f, gen->kill_mask_reg, kil_reg); + } + else { + spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); + } + + free_itemps(gen); + + return TRUE; +} + + + +/** + * Emit min or max. + */ +static boolean +emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* d = (s0 > s1) ? s0 : s1 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) + spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); + else + spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit code to update the execution mask. + * This needs to be done whenever the execution status of a conditional + * or loop is changed. + */ +static void +emit_update_exec_mask(struct codegen *gen) +{ + const int exec_reg = get_exec_mask_reg(gen); + const int cond_reg = gen->cond_mask_reg; + const int loop_reg = gen->loop_mask_reg; + + spe_comment(gen->f, 0, "Update master execution mask"); + + if (gen->if_nesting > 0 && gen->loop_nesting > 0) { + /* exec_mask = cond_mask & loop_mask */ + assert(cond_reg > 0); + assert(loop_reg > 0); + spe_and(gen->f, exec_reg, cond_reg, loop_reg); + } + else if (gen->if_nesting > 0) { + assert(cond_reg > 0); + spe_move(gen->f, exec_reg, cond_reg); + } + else if (gen->loop_nesting > 0) { + assert(loop_reg > 0); + spe_move(gen->f, exec_reg, loop_reg); + } + else { + spe_load_int(gen->f, exec_reg, ~0x0); + } +} + + +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int channel = 0; + int cond_reg; + + cond_reg = get_cond_mask_reg(gen); + + /* XXX push cond exec mask */ + + spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); + spe_load_int(gen->f, cond_reg, ~0); + + /* update conditional execution mask with the predicate register */ + int tmp_reg = get_itemp(gen); + int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + + /* tmp = (s1_reg == 0) */ + spe_ceqi(gen->f, tmp_reg, s1_reg, 0); + /* tmp = !tmp */ + spe_complement(gen->f, tmp_reg, tmp_reg); + /* cond_mask = cond_mask & tmp */ + spe_and(gen->f, cond_reg, cond_reg, tmp_reg); + + gen->if_nesting++; + + /* update the master execution mask */ + emit_update_exec_mask(gen); + + free_itemps(gen); + + return TRUE; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int cond_reg = get_cond_mask_reg(gen); + + spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); + spe_complement(gen->f, cond_reg, cond_reg); + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + /* XXX todo: pop cond exec mask */ + + gen->if_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int exec_reg, loop_reg; + + exec_reg = get_exec_mask_reg(gen); + loop_reg = get_loop_mask_reg(gen); + + /* XXX push loop_exec mask */ + + spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); + spe_load_int(gen->f, loop_reg, ~0x0); + + gen->loop_nesting++; + gen->loop_start = spe_code_size(gen->f); /* in bytes */ + + return TRUE; +} + + +static boolean +emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int loop_reg = get_loop_mask_reg(gen); + const int tmp_reg = get_itemp(gen); + int offset; + + /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ + spe_orx(gen->f, tmp_reg, loop_reg); + + offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ + + /* branch back to top of loop if tmp_reg != 0 */ + spe_brnz(gen->f, tmp_reg, offset / 4); + + /* XXX pop loop_exec mask */ + + gen->loop_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + const int loop_reg = get_loop_mask_reg(gen); + + assert(gen->loop_nesting > 0); + + spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); + spe_andc(gen->f, loop_reg, loop_reg, exec_reg); + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + assert(gen->loop_nesting > 0); + + return TRUE; +} + + +static boolean +emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, + boolean ddx) +{ + int ch; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + int t1_reg = get_itemp(gen); + int t2_reg = get_itemp(gen); + + spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ + if (ddx) { + spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ + } + else { + spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ + } + spe_fs(gen->f, d_reg, t2_reg, t1_reg); + + free_itemps(gen); + } + + return TRUE; +} + + + + +/** + * Emit END instruction. + * We just return from the shader function at this point. + * + * Note that there may be more code after this that would be + * called by TGSI_OPCODE_CALL. + */ +static boolean +emit_END(struct codegen *gen) +{ + emit_epilogue(gen); + return TRUE; +} + + +/** + * Emit code for the given instruction. Just a big switch stmt. + */ +static boolean +emit_instruction(struct codegen *gen, + const struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + return emit_ARL(gen, inst); + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + return emit_MOV(gen, inst); + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + return emit_binop(gen, inst); + case TGSI_OPCODE_MAD: + return emit_MAD(gen, inst); + case TGSI_OPCODE_LERP: + return emit_LERP(gen, inst); + case TGSI_OPCODE_DP3: + return emit_DP3(gen, inst); + case TGSI_OPCODE_DP4: + return emit_DP4(gen, inst); + case TGSI_OPCODE_DPH: + return emit_DPH(gen, inst); + case TGSI_OPCODE_NRM: + return emit_NRM3(gen, inst); + case TGSI_OPCODE_XPD: + return emit_XPD(gen, inst); + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + return emit_RCP_RSQ(gen, inst); + case TGSI_OPCODE_ABS: + return emit_ABS(gen, inst); + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + return emit_inequality(gen, inst); + case TGSI_OPCODE_CMP: + return emit_CMP(gen, inst); + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + return emit_MIN_MAX(gen, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(gen, inst); + case TGSI_OPCODE_FLR: + return emit_FLR(gen, inst); + case TGSI_OPCODE_FRC: + return emit_FRC(gen, inst); + case TGSI_OPCODE_END: + return emit_END(gen); + + case TGSI_OPCODE_COS: + return emit_function_call(gen, inst, "spu_cos", 1, TRUE); + case TGSI_OPCODE_SIN: + return emit_function_call(gen, inst, "spu_sin", 1, TRUE); + case TGSI_OPCODE_POW: + return emit_function_call(gen, inst, "spu_pow", 2, TRUE); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1, TRUE); + case TGSI_OPCODE_TEX: + /* fall-through for now */ + case TGSI_OPCODE_TXD: + /* fall-through for now */ + case TGSI_OPCODE_TXB: + /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ + case TGSI_OPCODE_TXP: + return emit_TEX(gen, inst); + case TGSI_OPCODE_KIL: + return emit_KIL(gen, inst); + + case TGSI_OPCODE_IF: + return emit_IF(gen, inst); + case TGSI_OPCODE_ELSE: + return emit_ELSE(gen, inst); + case TGSI_OPCODE_ENDIF: + return emit_ENDIF(gen, inst); + + case TGSI_OPCODE_BGNLOOP2: + return emit_BGNLOOP(gen, inst); + case TGSI_OPCODE_ENDLOOP2: + return emit_ENDLOOP(gen, inst); + case TGSI_OPCODE_BRK: + return emit_BRK(gen, inst); + case TGSI_OPCODE_CONT: + return emit_CONT(gen, inst); + + case TGSI_OPCODE_DDX: + return emit_DDX_DDY(gen, inst, TRUE); + case TGSI_OPCODE_DDY: + return emit_DDX_DDY(gen, inst, FALSE); + + /* XXX lots more cases to do... */ + + default: + fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", + inst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + + + +/** + * Emit code for a TGSI immediate value (vector of four floats). + * This involves register allocation and initialization. + * XXX the initialization should be done by a "prepare" stage, not + * per quad execution! + */ +static boolean +emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) +{ + int ch; + + assert(gen->num_imm < MAX_TEMPS); + + for (ch = 0; ch < 4; ch++) { + float val = immed->u.ImmediateFloat32[ch].Float; + + if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { + /* re-use previous register */ + gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; + } + else { + char str[100]; + int reg = spe_allocate_available_register(gen->f); + + if (reg < 0) + return FALSE; + + sprintf(str, "init $%d = %f", reg, val); + spe_comment(gen->f, 0, str); + + /* update immediate map */ + gen->imm_regs[gen->num_imm][ch] = reg; + + /* emit initializer instruction */ + spe_load_float(gen->f, reg, val); + } + } + + gen->num_imm++; + + return TRUE; +} + + + +/** + * Emit "code" for a TGSI declaration. + * We only care about TGSI TEMPORARY register declarations at this time. + * For each TGSI TEMPORARY we allocate four SPE registers. + */ +static boolean +emit_declaration(struct cell_context *cell, + struct codegen *gen, const struct tgsi_full_declaration *decl) +{ + int i, ch; + + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; + i++) { + assert(i < MAX_TEMPS); + for (ch = 0; ch < 4; ch++) { + gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); + if (gen->temp_regs[i][ch] < 0) + return FALSE; /* out of regs */ + } + + /* XXX if we run out of SPE registers, we need to spill + * to SPU memory. someday... + */ + + { + char buf[100]; + sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, + gen->temp_regs[i][0], gen->temp_regs[i][1], + gen->temp_regs[i][2], gen->temp_regs[i][3]); + spe_comment(gen->f, 0, buf); + } + } + break; + default: + ; /* ignore */ + } + + return TRUE; +} + + + +/** + * Translate TGSI shader code to SPE instructions. This is done when + * the state tracker gives us a new shader (via pipe->create_fs_state()). + * + * \param cell the rendering context (in) + * \param tokens the TGSI shader (in) + * \param f the generated function (out) + */ +boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f) +{ + struct tgsi_parse_context parse; + struct codegen gen; + uint ic = 0; + + memset(&gen, 0, sizeof(gen)); + gen.cell = cell; + gen.f = f; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + gen.inputs_reg = 3; /* pointer to inputs array */ + gen.outputs_reg = 4; /* pointer to outputs array */ + gen.constants_reg = 5; /* pointer to constants array */ + + spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, gen.inputs_reg); + spe_allocate_register(f, gen.outputs_reg); + spe_allocate_register(f, gen.constants_reg); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, TRUE); + spe_indent(f, 2*8); + printf("Begin %s\n", __FUNCTION__); + tgsi_dump(tokens, 0); + } + + tgsi_parse_init(&parse, tokens); + + emit_prologue(&gen); + + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (f->print) { + _debug_printf(" # "); + tgsi_dump_immediate(&parse.FullToken.FullImmediate); + } + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + gen.error = TRUE; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (f->print) { + _debug_printf(" # "); + tgsi_dump_declaration(&parse.FullToken.FullDeclaration); + } + if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) + gen.error = TRUE; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (f->print) { + _debug_printf(" # "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } + if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) + gen.error = TRUE; + break; + + default: + assert(0); + } + } + + if (gen.error) { + /* terminate the SPE code */ + return emit_END(&gen); + } + + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); + printf("End %s\n", __FUNCTION__); + } + + tgsi_parse_free( &parse ); + + return !gen.error; +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h new file mode 100644 index 0000000000..99faea7046 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef CELL_GEN_FP_H +#define CELL_GEN_FP_H + + + +extern boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f); + + +#endif /* CELL_GEN_FP_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 0000000000..0ea8f017ef --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,2181 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + * \author Bob Ellison + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg register containing integer fragment Z values (in) + * \param ifbZ_reg register containing integer frame buffer Z values (in/out) + * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns TRUE if the Z-buffer needs to be updated. + */ +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ + /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ + * quantities. This only makes a difference for 32-bit Z values though. + */ + ASSERT(dsa->depth.enabled); + + switch (dsa->depth.func) { + case PIPE_FUNC_EQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GREATER: + /* zmask = (ifragZ > ref) */ + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LESS: + /* zmask = (ref > ifragZ) */ + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* zmask = (ifragZ > ref) */ + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* zmask = (ref > ifragZ) */ + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ + spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* mask unchanged */ + spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ + break; + + default: + ASSERT(0); + break; + } + + if (dsa->depth.writemask) { + /* + * If (ztest passed) { + * framebufferZ = fragmentZ; + * } + * OR, + * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; + */ + spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return TRUE; + } + + return FALSE; +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask_reg, int fragA_reg) +{ + int ref_reg = spe_allocate_available_register(f); + int amask_reg = spe_allocate_available_register(f); + + ASSERT(dsa->alpha.enabled); + + if ((dsa->alpha.func != PIPE_FUNC_NEVER) && + (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + /* load/splat the alpha reference float value */ + spe_load_float(f, ref_reg, dsa->alpha.ref); + } + + /* emit code to do the alpha comparison, updating 'mask' */ + switch (dsa->alpha.func) { + case PIPE_FUNC_EQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GREATER: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LESS: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ + break; + + case PIPE_FUNC_ALWAYS: + /* no-op, mask unchanged */ + break; + + default: + ASSERT(0); + break; + } + +#if OPTIMIZATIONS + /* if mask == {0,0,0,0} we're all done, return */ + { + /* re-use amask reg here */ + int tmp_reg = amask_reg; + /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ + spe_orx(f, tmp_reg, mask_reg); + /* if tmp[0] == 0 then return from function call */ + spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); + } +#endif + + spe_release_register(f, ref_reg); + spe_release_register(f, amask_reg); +} + + +/** + * This pair of functions is used inline to allocate and deallocate + * optional constant registers. Once a constant is discovered to be + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. + */ +static INLINE void +setup_optional_register(struct spe_function *f, + int *r) +{ + if (*r < 0) + *r = spe_allocate_available_register(f); +} + +static INLINE void +release_optional_register(struct spe_function *f, + int r) +{ + if (r >= 0) + spe_release_register(f, r); +} + +static INLINE void +setup_const_register(struct spe_function *f, + int *r, + float value) +{ + if (*r >= 0) + return; + setup_optional_register(f, r); + spe_load_float(f, *r, value); +} + +static INLINE void +release_const_register(struct spe_function *f, + int r) +{ + release_optional_register(f, r); +} + + + +/** + * Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ +static void +unpack_colors(struct spe_function *f, + enum pipe_format color_format, + int fbRGBA_reg, + int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) +{ + int mask0_reg = spe_allocate_available_register(f); + int mask1_reg = spe_allocate_available_register(f); + int mask2_reg = spe_allocate_available_register(f); + int mask3_reg = spe_allocate_available_register(f); + + spe_load_int(f, mask0_reg, 0xff); + spe_load_int(f, mask1_reg, 0xff00); + spe_load_int(f, mask2_reg, 0xff0000); + spe_load_int(f, mask3_reg, 0xff000000); + + spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); + + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); + + /* fbG = fbG >> 8 */ + spe_roti(f, fbG_reg, fbG_reg, -8); + + /* fbR = fbR >> 16 */ + spe_roti(f, fbR_reg, fbR_reg, -16); + + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); + + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask0_reg); + spe_release_register(f, mask1_reg); + spe_release_register(f, mask2_reg); + spe_release_register(f, mask3_reg); +} + + +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int tmp_reg = spe_allocate_available_register(f); + + /* Optional constant registers we might or might not end up using; + * if we do use them, make sure we only allocate them once by + * keeping a flag on each one. + */ + int one_reg = -1; + int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; + + ASSERT(blend->blend_enable); + + /* packed RGBA -> float colors */ + unpack_colors(f, color_format, fbRGBA_reg, + fbR_reg, fbG_reg, fbB_reg, fbA_reg); + + /* + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color), + * because in some cases (like PIPE_BLENDFACTOR_ONE and + * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) + * or in other words term = (R-R*R, G-G*G, B-B*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) + * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) + * or term = (R-R*A,G-G*A,B-B*A) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) + * or term = (R-R*Afb,G-G*Afb,b-B*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) + * or term = (R-R*Rc, G-G*Gc, B-B*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) + * or term = (R-R*Ac,G-G*Ac,B-B*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + setup_const_register(f, &one_reg, 1.0f); + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + * We could expand the term (as a*min(b,c) == min(a*b,a*c) + * as long as a is positive), but then we'd have to do three + * spe_float_min() functions instead of one, so this is simpler. + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term. Like the above, we're looking for + * the full term A*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term1A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ + case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = A */ + spe_move(f, term1A_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = A*A */ + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = A*(1-A) = A-A*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = A*Afb */ + spe_fm(f, term1A_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = A*Ac */ + spe_fm(f, term1A_reg, fragA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + ASSERT(0); + } + + /* + * Compute Dest RGB term. Like the above, we're looking for + * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factor s= (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term2R_reg, 0.0f); + spe_load_float(f, term2G_reg, 0.0f); + spe_load_float(f, term2B_reg, 0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) + * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fbR_reg); + spe_fm(f, term2G_reg, fbG_reg, fbG_reg); + spe_fm(f, term2B_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) + * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ + spe_fm(f, term2R_reg, fbR_reg, fbA_reg); + spe_fm(f, term2G_reg, fbG_reg, fbA_reg); + spe_fm(f, term2B_reg, fbB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) + * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ + spe_fm(f, term2R_reg, fbR_reg, constR_reg); + spe_fm(f, term2G_reg, fbG_reg, constG_reg); + spe_fm(f, term2B_reg, fbB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ + spe_fm(f, term2R_reg, fbR_reg, constA_reg); + spe_fm(f, term2G_reg, fbG_reg, constA_reg); + spe_fm(f, term2B_reg, fbB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) + * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) + * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term. Like the above, we're looking for + * the full term Afb*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = Afb */ + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term2A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = Afb*A */ + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = Afb*Afb */ + spe_fm(f, term2A_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = Afb*Ac */ + spe_fm(f, term2A_reg, fbA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms as per the blend equation. + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); + break; + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); + break; + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + release_const_register(f, one_reg); + release_const_register(f, constR_reg); + release_const_register(f, constG_reg); + release_const_register(f, constB_reg); + release_const_register(f, constA_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + ASSERT(blend->logicop_enable); + + switch(blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: /* 0 */ + spe_zero(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOR: /* ~(s | d) */ + spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_INVERT: /* ~d */ + /* Note that (A nor A) == ~(A|A) == ~A */ + spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_XOR: /* s ^ d */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_NAND: /* ~(s & d) */ + spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND: /* s & d */ + spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOOP: /* d */ + spe_move(f, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY: /* s */ + break; + case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR: /* s | d */ + spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_SET: /* 1 */ + spe_load_int(f, fragRGBA_reg, 0xffffffff); + break; + default: + ASSERT(0); + } +} + + +/** + * Generate code to pack a quad of float colors into four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + int rg_reg = spe_allocate_available_register(f); + int ba_reg = spe_allocate_available_register(f); + + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to the least significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rg_reg, r_reg, g_reg); + spe_or(f, ba_reg, a_reg, b_reg); + spe_or(f, rgba_reg, rg_reg, ba_reg); + + spe_release_register(f, rg_reg); + spe_release_register(f, ba_reg); +} + + +static void +gen_colormask(struct spe_function *f, + uint colormask, + enum pipe_format color_format, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. Further, the pixels + * are packed according to the given color format, not + * necessarily RGBA... + */ + uint r_mask; + uint g_mask; + uint b_mask; + uint a_mask; + + /* Calculate exactly where the bits for any particular color + * end up, so we can mask them correctly. + */ + switch(color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* ARGB */ + a_mask = 0xff000000; + r_mask = 0x00ff0000; + g_mask = 0x0000ff00; + b_mask = 0x000000ff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* BGRA */ + b_mask = 0xff000000; + g_mask = 0x00ff0000; + r_mask = 0x0000ff00; + a_mask = 0x000000ff; + break; + default: + ASSERT(0); + } + + /* For each R, G, B, and A component we're supposed to mask out, + * clear its bits. Then our mask operation later will work + * as expected. + */ + if (!(colormask & PIPE_MASK_R)) { + r_mask = 0; + } + if (!(colormask & PIPE_MASK_G)) { + g_mask = 0; + } + if (!(colormask & PIPE_MASK_B)) { + b_mask = 0; + } + if (!(colormask & PIPE_MASK_A)) { + a_mask = 0; + } + + /* Get a temporary register to hold the mask that will be applied + * to the fragment + */ + int colormask_reg = spe_allocate_available_register(f); + + /* The actual mask we're going to use is an OR of the remaining R, G, B, + * and A masks. Load the result value into our temporary register. + */ + spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} + + +/** + * This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * There's some added complexity if there's a non-trivial state->mask + * value; then stencil and reference both must be masked + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in + * (fragment_mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, + const struct pipe_stencil_state *state, + uint stencil_max_value, + int fragment_mask_reg, + int fbS_reg, + int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the + * stencil_pass_reg register, taking into account whether each fragment + * was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ + uint tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->value_mask & state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_NOTEQUAL: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->value_mask & state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_LESS: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference < s) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->value_mask & state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_GREATER: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + } + else { + /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ + int tmp_reg = spe_allocate_available_register(f); + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->value_mask & state->ref_value); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_GEQUAL: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference >= s) + * = fragment_mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, + state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->value_mask & state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_LEQUAL: + if (state->value_mask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference <= s) ] + * = fragment_mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + } + else { + /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ + int tmp_reg = spe_allocate_available_register(f); + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value & state->value_mask); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->value_mask); + spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_NEVER: + /* stencil_pass = fragment_mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = fragment_mask & 1 = fragment_mask */ + spe_move(f, stencil_pass_reg, fragment_mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). + */ +} + + +/** + * This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, + uint stencil_op, + uint stencil_ref_value, + uint stencil_max_value, + int fbS_reg, + int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/** + * This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, + const struct pipe_stencil_state *stencil, + const uint depth_enabled, + int fbS_reg, + int *fail_reg, + int *zfail_reg, + int *zpass_reg) +{ + uint zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(stencil->enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes, which means that the zfail_op is not considered - a + * failing stencil test triggers the fail_op, and a passing one + * triggers the zpass_op + * + * As an optimization, override calculation of the zfail_op values + * if they aren't going to be used. By setting the value of + * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed + * to match the incoming stencil values, and no calculation will + * be done. + */ + if (depth_enabled) { + zfail_op = stencil->zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->fail_op, stencil->ref_value, + 0xff, fbS_reg, *fail_reg); + } + + /* Check the possibly overridden value, not the structure value */ + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == stencil->fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->zfail_op, stencil->ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (stencil->zpass_op == stencil->fail_op) { + *zpass_reg = *fail_reg; + } + else if (stencil->zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->zpass_op, stencil->ref_value, + 0xff, fbS_reg, *zpass_reg); + } +} + +/** + * Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled. This function must not use + * the register if depth is not enabled. + */ +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const uint facing, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = FALSE; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + struct pipe_stencil_state *stencil; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + int stencil_pass_reg, stencil_fail_reg; + int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + int stencil_writemask_reg; + int zmask_reg; + int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_comment(f, 0, "Allocating stencil register set"); + spe_allocate_register_set(f); + + /* The facing we're given is the fragment facing; it doesn't + * exactly match the stencil facing. If stencil is enabled, + * but two-sided stencil is *not* enabled, we use the same + * stencil settings for both front- and back-facing fragments. + * We only use the "back-facing" stencil for backfacing fragments + * if two-sided stenciling is enabled. + */ + if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { + stencil = &dsa->stencil[1]; + } + else { + stencil = &dsa->stencil[0]; + } + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + * + * Note that if the backface stencil is *not* enabled, the backface + * stencil will have the same values as the frontface stencil. + */ + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && + stencil->zfail_op == PIPE_STENCIL_OP_KEEP && + stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { + need_to_calculate_stencil_values = FALSE; + need_to_writemask_stencil_values = FALSE; + } + else if (stencil->write_mask == 0x0) { + /* All changes are writemasked out, so no need to calculate + * what those changes might be, and no need to write anything back. + */ + need_to_calculate_stencil_values = FALSE; + need_to_writemask_stencil_values = FALSE; + } + else if (stencil->write_mask == 0xff) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = TRUE; + need_to_writemask_stencil_values = FALSE; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = TRUE; + need_to_writemask_stencil_values = TRUE; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + spe_comment(f, 0, "Computing stencil writemask"); + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask); + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + spe_comment(f, 0, "Running basic stencil test"); + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg); + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + spe_comment(f, 0, facing == CELL_FACING_FRONT + ? "Computing front-facing stencil values" + : "Computing back-facing stencil values"); + gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg, + &stencil_fail_values, &stencil_pass_depth_fail_values, + &stencil_pass_depth_pass_values); + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + spe_comment(f, 0, "Running stencil depth test"); + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, + fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Saving current stencil values for writemasking"); + spe_move(f, newS_reg, fbS_reg); + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_comment(f, 0, "Loading stencil fail values"); + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + modified_buffers = TRUE; + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + uint stencil_pass_depth_fail_mask = + spe_allocate_available_register(f); + + spe_comment(f, 0, "Loading stencil pass/depth fail values"); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, + stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + modified_buffers = TRUE; + } + + /* Same for the stencil pass/depth pass mask. Note that we + * *can* get here with zmask_reg being unset (if the depth + * test is off but the stencil test is on). In this case, + * we assume the depth test passes, and don't need to mask + * the stencil pass mask with the Z mask. + */ + if (stencil_pass_depth_pass_values != fbS_reg) { + if (dsa->depth.enabled) { + uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + /* We'll need a separate register */ + spe_comment(f, 0, "Loading stencil pass/depth pass values"); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + else { + /* We can use the same stencil-pass register */ + spe_comment(f, 0, "Loading stencil pass values"); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); + } + modified_buffers = TRUE; + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values && modified_buffers) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_comment(f, 0, "Writemasking new stencil values"); + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_comment(f, 0, "Releasing stencil register set"); + spe_release_register_set(f); + + /* Return TRUE if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + +/** + * Generate depth and/or stencil test code. + * \param cell context + * \param dsa depth/stencil/alpha state + * \param f spe function to emit + * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK + * \param mask_reg register containing the pixel alive/dead mask + * \param depth_tile_reg register containing address of z/stencil tile + * \param quad_offset_reg offset to quad from start of tile + * \param fragZ_reg register containg fragment Z values + */ +static void +gen_depth_stencil(struct cell_context *cell, + const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, + uint facing, + int mask_reg, + int depth_tile_reg, + int quad_offset_reg, + int fragZ_reg) + +{ + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + /* framebuffer's combined z/stencil values register */ + int fbZS_reg = spe_allocate_available_register(f); + + /* Framebufer Z values register */ + int fbZ_reg = spe_allocate_available_register(f); + + /* Framebuffer stencil values register (may not be used) */ + int fbS_reg = spe_allocate_available_register(f); + + /* 24-bit mask register (may not be used) */ + int zmask_reg = spe_allocate_available_register(f); + + /** + * The following code: + * 1. fetch quad of packed Z/S values from the framebuffer tile. + * 2. extract the separate the Z and S values from packed values + * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints + * + * The instructions for doing this are interleaved for better performance. + */ + spe_comment(f, 0, "Fetch Z/stencil quad from tile"); + + switch(zs_format) { + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + /* prepare mask to extract Z vals from ZS vals */ + spe_load_uint(f, zmask_reg, 0x00ffffff); + + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* right shift 32-bit fragment Z to 24 bits */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + + /* extract 24-bit Z values from ZS values by masking */ + spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); + + /* extract 8-bit stencil values by shifting */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* right shift 32-bit fragment Z to 24 bits */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + + /* extract 24-bit Z values from ZS values by shifting */ + spe_rotmi(f, fbZ_reg, fbZS_reg, -8); + + /* extract 8-bit stencil values by masking */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + break; + + case PIPE_FORMAT_Z32_UNORM: + /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); + + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + /* XXX This code for 16bpp Z is broken! */ + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* convert Z from [0,1] to 16-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ + } + + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ + if (dsa->stencil[0].enabled) { + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg >= 0); + spe_comment(f, 0, "Perform stencil test"); + + /* Note that fbZ_reg may not be set on entry, if stenciling + * is enabled but there's no Z-buffer. The + * gen_stencil_depth_test() function must ignore the + * fbZ_reg register if depth is not enabled. + */ + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, + mask_reg, fragZ_reg, + fbZ_reg, fbS_reg); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + ASSERT(fbZ_reg >= 0); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, + fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + else { + write_depth_stencil = FALSE; + } + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + spe_comment(f, 0, "Store quad's depth/stencil values in tile"); + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + ASSERT(0); /* XXX to do */ + } + else { + ASSERT(0); /* bad zs_format */ + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + /* Don't need these any more */ + spe_release_register(f, fbZS_reg); + spe_release_register(f, fbZ_reg); + spe_release_register(f, fbS_reg); + spe_release_register(f, zmask_reg); +} + + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader. But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell the rendering context (in) + * \param facing whether the generated code is for front-facing or + * back-facing fragments + * \param f the generated function (in/out); on input, the function + * must already have been initialized. On exit, whatever + * instructions within the generated function have had + * the fragment ops appended. + */ +void +cell_gen_fragment_function(struct cell_context *cell, + const uint facing, + struct spe_function *f) +{ + const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; + const struct pipe_blend_state *blend = cell->blend; + const struct pipe_blend_color *blend_color = &cell->blend_color; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + const int x_reg = 3; /* uint */ + const int y_reg = 4; /* uint */ + const int color_tile_reg = 5; /* tile_t * */ + const int depth_tile_reg = 6; /* tile_t * */ + const int fragZ_reg = 7; /* vector float */ + const int fragR_reg = 8; /* vector float */ + const int fragG_reg = 9; /* vector float */ + const int fragB_reg = 10; /* vector float */ + const int fragA_reg = 11; /* vector float */ + const int mask_reg = 12; /* vector uint */ + + ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); + + /* offset of quad from start of tile + * XXX assuming 4-byte pixels for color AND Z/stencil!!!! + */ + int quad_offset_reg; + + int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, TRUE); + spe_indent(f, 8); + spe_comment(f, -4, facing == CELL_FACING_FRONT + ? "Begin front-facing per-fragment ops" + : "Begin back-facing per-fragment ops"); + } + + spe_allocate_register(f, x_reg); + spe_allocate_register(f, y_reg); + spe_allocate_register(f, color_tile_reg); + spe_allocate_register(f, depth_tile_reg); + spe_allocate_register(f, fragZ_reg); + spe_allocate_register(f, fragR_reg); + spe_allocate_register(f, fragG_reg); + spe_allocate_register(f, fragB_reg); + spe_allocate_register(f, fragA_reg); + spe_allocate_register(f, mask_reg); + + quad_offset_reg = spe_allocate_available_register(f); + fbRGBA_reg = spe_allocate_available_register(f); + + /* compute offset of quad from start of tile, in bytes */ + { + int x2_reg = spe_allocate_available_register(f); + int y2_reg = spe_allocate_available_register(f); + + ASSERT(TILE_SIZE == 32); + + spe_comment(f, 0, "Compute quad offset within tile"); + spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ + spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ + spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ + + spe_release_register(f, x2_reg); + spe_release_register(f, y2_reg); + } + + /* Generate the alpha test, if needed. */ + if (dsa->alpha.enabled) { + gen_alpha_test(dsa, f, mask_reg, fragA_reg); + } + + /* generate depth and/or stencil test code */ + if (dsa->depth.enabled || dsa->stencil[0].enabled) { + gen_depth_stencil(cell, dsa, f, + facing, + mask_reg, + depth_tile_reg, + quad_offset_reg, + fragZ_reg); + } + + /* Get framebuffer quad/colors. We'll need these for blending, + * color masking, and to obey the quad/pixel mask. + * Load: fbRGBA_reg = memory[color_tile + quad_offset] + * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking + * we could skip this load. + */ + spe_comment(f, 0, "Fetch quad colors from tile"); + spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + + if (blend->blend_enable) { + spe_comment(f, 0, "Perform blending"); + gen_blend(blend, blend_color, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); + } + + /* + * Write fragment colors to framebuffer/tile. + * This involves converting the fragment colors from float[4] to the + * tile's specific format and obeying the quad/pixel mask. + */ + { + int rgba_reg = spe_allocate_available_register(f); + + /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); + + if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); + } + + if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); + gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); + } + + /* Mix fragment colors with framebuffer colors using the quad/pixel mask: + * if (mask[i]) + * rgba[i] = rgba[i]; + * else + * rgba[i] = framebuffer[i]; + */ + spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + + /* Store updated quad in tile: + * memory[color_tile + quad_offset] = rgba_reg; + */ + spe_comment(f, 0, "Store quad colors into color tile"); + spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + + spe_release_register(f, rgba_reg); + } + + //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + + spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ + + spe_release_register(f, fbRGBA_reg); + spe_release_register(f, quad_offset_reg); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + char buffer[1024]; + sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", + facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); + spe_comment(f, -4, buffer); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h new file mode 100644 index 0000000000..21b35d1faf --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H + + +extern void +cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); + + +#endif /* CELL_GEN_FRAGMENT_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c new file mode 100644 index 0000000000..81efd137c7 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -0,0 +1,358 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_pipe_state.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +static void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (struct pipe_blend_state *) blend; + cell->dirty |= CELL_NEW_BLEND; +} + + +static void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +static void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +static void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *dsa) +{ + return mem_dup(dsa, sizeof(*dsa)); +} + + +static void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *dsa) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +static void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) +{ + FREE(dsa); +} + + +static void +cell_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void +cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +static void +cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +static void +cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} + + + +static void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + return mem_dup(rasterizer, sizeof(*rasterizer)); +} + + +static void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) +{ + struct pipe_rasterizer_state *rasterizer = + (struct pipe_rasterizer_state *) rast; + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, rasterizer); + + cell->rasterizer = rasterizer; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +static void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + + +static void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +static void +cell_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **samplers) +{ + struct cell_context *cell = cell_context(pipe); + uint i, changed = 0x0; + + assert(num <= CELL_MAX_SAMPLERS); + + draw_flush(cell->draw); + + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; + if (cell->sampler[i] != new_samp) { + cell->sampler[i] = new_samp; + changed |= (1 << i); + } + } + + if (changed) { + cell->dirty |= CELL_NEW_SAMPLER; + cell->dirty_samplers |= changed; + } +} + + +static void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +static void +cell_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct cell_context *cell = cell_context(pipe); + uint i, changed = 0x0; + + assert(num <= CELL_MAX_SAMPLERS); + + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + struct pipe_texture *new_tex = i < num ? texture[i] : NULL; + if ((struct pipe_texture *) cell->texture[i] != new_tex) { + pipe_texture_reference((struct pipe_texture **) &cell->texture[i], + new_tex); + changed |= (1 << i); + } + } + + cell->num_textures = num; + + if (changed) { + cell->dirty |= CELL_NEW_TEXTURE; + cell->dirty_textures |= changed; + } +} + + + +static void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* Finish any pending rendering to the current surface before + * installing a new surface! + */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + /* update my state + * (this is also where old surfaces will finally get freed) + */ + cell->framebuffer.width = fb->width; + cell->framebuffer.height = fb->height; + cell->framebuffer.num_cbufs = fb->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf, flags); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf, flags); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + + + +void +cell_init_state_functions(struct cell_context *cell) +{ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_states = cell_bind_sampler_states; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.set_sampler_textures = cell_set_sampler_textures; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; +} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h new file mode 100644 index 0000000000..1889bd52ff --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_PIPE_STATE_H +#define CELL_PIPE_STATE_H + + +struct cell_context; + +extern void +cell_init_state_functions(struct cell_context *cell); + + +#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 0000000000..79cb8df82f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,211 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Last stage of 'draw' pipeline: send tris to SPUs. + * \author Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "util/u_memory.h" +#include "draw/draw_private.h" + + +struct render_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ + return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 + struct render_stage *render = render_stage(stage); + struct cell_context *sp = render->cell; + const struct pipe_shader_state *fs = &render->cell->fs->shader; + render->quad.nr_attrs = render->cell->nr_frag_attrs; + + render->firstFpInput = fs->input_semantic_name[0]; + + sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ + struct render_stage *render = render_stage(stage); + /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; + render->vertex_size = cell->vertex_info->size * 4; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; + + cell->prim_buffer.xmin = 1e100; + cell->prim_buffer.ymin = 1e100; + cell->prim_buffer.xmax = -1e100; + cell->prim_buffer.ymax = -1e100; + + /* XXX temporary, need to double-buffer the prim buffer until we get + * a real command buffer/list system. + */ + cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new draw/render stage. This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ + struct render_stage *render = CALLOC_STRUCT(render_stage); + + render->cell = cell; + render->stage.draw = cell->draw; + render->stage.begin = render_begin; + render->stage.point = render_point; + render->stage.line = render_line; + render->stage.tri = render_tri; + render->stage.end = render_end; + render->stage.reset_stipple_counter = reset_stipple_counter; + render->stage.destroy = render_destroy; + + /* + render->quad.coef = render->coef; + render->quad.posCoef = &render->posCoef; + */ + + return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 0000000000..826dcbafeb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c new file mode 100644 index 0000000000..6fc2257e2a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -0,0 +1,174 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "cell/common.h" +#include "cell_screen.h" +#include "cell_texture.h" +#include "cell_winsys.h" + + +static const char * +cell_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +cell_get_name(struct pipe_screen *screen) +{ + return "Cell"; +} + + +static int +cell_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return CELL_MAX_SAMPLERS; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 10; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return CELL_MAX_TEXTURE_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return CELL_MAX_TEXTURE_LEVELS; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; /* XXX not really true */ + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; /* XXX to do */ + default: + return 0; + } +} + + +static float +cell_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static boolean +cell_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + /* cell supports most formats, XXX for now anyway */ + if (format == PIPE_FORMAT_DXT5_RGBA || + format == PIPE_FORMAT_R8G8B8A8_SRGB) + return FALSE; + else + return TRUE; +} + + +static void +cell_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no cell_screen) but + * that would be the place to put SPU thread/context info... + */ +struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = cell_destroy_screen; + + screen->get_name = cell_get_name; + screen->get_vendor = cell_get_vendor; + screen->get_param = cell_get_param; + screen->get_paramf = cell_get_paramf; + screen->is_format_supported = cell_is_format_supported; + + cell_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h new file mode 100644 index 0000000000..c7e15889d6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_SCREEN_H +#define CELL_SCREEN_H + + +struct pipe_screen; +struct pipe_winsys; + + +extern struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys); + + +#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 0000000000..28e5e6d706 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,219 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Utility/wrappers for communicating with the SPUs. + */ + + +#include <pthread.h> + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +/** + * Cell/SPU info that's not per-context. + */ +struct cell_global_info cell_global; + + +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ + spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ + do { + unsigned data; + int count = spe_out_mbox_read(ctx, &data, 1); + + if (count == 1) { + return data; + } + + if (count < 0) { + /* error */ ; + } + } while (1); +} + + +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) +{ + struct cell_init_info *init = (struct cell_init_info *) arg; + unsigned entry = SPE_DEFAULT_ENTRY; + + ASSERT_ALIGN16(init); + + if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, + init, NULL, NULL) < 0) { + fprintf(stderr, "spe_context_run() failed\n"); + exit(1); + } + + pthread_exit(NULL); +} + + +/** + * Create the SPU threads. This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. + */ +void +cell_start_spus(struct cell_context *cell) +{ + static boolean one_time_init = FALSE; + uint i, j; + uint timebase = get_timebase(); + + if (one_time_init) { + fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " + "on Cell.\n"); + abort(); + } + + one_time_init = TRUE; + + assert(cell->num_spus <= CELL_MAX_SPUS); + + ASSERT_ALIGN16(&cell_global.inits[0]); + ASSERT_ALIGN16(&cell_global.inits[1]); + + /* + * Initialize the global 'inits' structure for each SPU. + * A pointer to the init struct will be passed to each SPU. + * The SPUs will then each grab their init info with mfc_get(). + */ + for (i = 0; i < cell->num_spus; i++) { + cell_global.inits[i].id = i; + cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].debug_flags = cell->debug_flags; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; + + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; + } + cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + + cell_global.inits[i].spu_functions = &cell->spu_functions; + + cell_global.spe_contexts[i] = spe_context_create(0, NULL); + if (!cell_global.spe_contexts[i]) { + fprintf(stderr, "spe_context_create() failed\n"); + exit(1); + } + + if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { + fprintf(stderr, "spe_program_load() failed\n"); + exit(1); + } + + pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ + NULL, /* pthread attribs */ + &cell_thread_function, /* start routine */ + &cell_global.inits[i]); /* thread argument */ + } +} + + +/** + * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. + */ +void +cell_spu_exit(struct cell_context *cell) +{ + uint i; + + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); + } + + /* wait for threads to exit */ + for (i = 0; i < cell->num_spus; i++) { + void *value; + pthread_join(cell_global.spe_threads[i], &value); + cell_global.spe_threads[i] = 0; + cell_global.spe_contexts[i] = 0; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 0000000000..c93958a9ed --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include <libspe2.h> +#include <pthread.h> +#include "cell/common.h" + +#include "cell_context.h" + + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ + /** + * SPU/SPE handles, etc + */ + spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; + pthread_t spe_threads[CELL_MAX_SPUS]; + + /** + * Data sent to SPUs at start-up + */ + struct cell_init_info inits[CELL_MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 0000000000..b193170f9c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT 0x1 +#define CELL_NEW_RASTERIZER 0x2 +#define CELL_NEW_FS 0x4 +#define CELL_NEW_BLEND 0x8 +#define CELL_NEW_CLIP 0x10 +#define CELL_NEW_SCISSOR 0x20 +#define CELL_NEW_STIPPLE 0x40 +#define CELL_NEW_FRAMEBUFFER 0x80 +#define CELL_NEW_ALPHA_TEST 0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER 0x400 +#define CELL_NEW_TEXTURE 0x800 +#define CELL_NEW_VERTEX 0x1000 +#define CELL_NEW_VS 0x2000 +#define CELL_NEW_VS_CONSTANTS 0x4000 +#define CELL_NEW_FS_CONSTANTS 0x8000 +#define CELL_NEW_VERTEX_INFO 0x10000 + + +extern void +cell_update_derived( struct cell_context *softpipe ); + + +extern void +cell_init_shader_functions(struct cell_context *cell); + + +extern void +cell_init_vertex_functions(struct cell_context *cell); + + +#endif /* CELL_STATE_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 0000000000..efc4f78364 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ + const struct cell_fragment_shader_state *fs = cell->fs; + const enum interp_mode colorInterp + = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo = &cell->vertex_info; + uint i; + int src; + +#if 0 + if (cell->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; + } +#endif + + /* reset vinfo */ + vinfo->num_attribs = 0; + + /* we always want to emit vertex pos */ + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + /* already done above */ + break; + + case TGSI_SEMANTIC_COLOR: + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + fs->info.input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); +#if 1 + if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ + src = 0; +#endif + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + fs->info.input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + /* XXX only signal this if format really changes */ + cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} +#endif + + + +/** + * Update derived state, send current state to SPUs prior to rendering. + */ +void cell_update_derived( struct cell_context *cell ) +{ + if (cell->dirty & (CELL_NEW_RASTERIZER | + CELL_NEW_FS | + CELL_NEW_VS)) + calculate_vertex_layout( cell ); + +#if 0 + if (cell->dirty & (CELL_NEW_SCISSOR | + CELL_NEW_DEPTH_STENCIL_ALPHA | + CELL_NEW_FRAMEBUFFER)) + compute_cliprect(cell); +#endif + + cell_emit_state(cell); + + cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 0000000000..39b85faeb8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,340 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + + +/** + * Find/create a cell_command_fragment_ops object corresponding to the + * current blend/stencil/z/colormask/etc. state. + */ +static struct cell_command_fragment_ops * +lookup_fragment_ops(struct cell_context *cell) +{ + struct cell_fragment_ops_key key; + struct cell_command_fragment_ops *ops; + + /* + * Build key + */ + memset(&key, 0, sizeof(key)); + key.blend = *cell->blend; + key.blend_color = cell->blend_color; + key.dsa = *cell->depth_stencil; + + if (cell->framebuffer.cbufs[0]) + key.color_format = cell->framebuffer.cbufs[0]->format; + else + key.color_format = PIPE_FORMAT_NONE; + + if (cell->framebuffer.zsbuf) + key.zs_format = cell->framebuffer.zsbuf->format; + else + key.zs_format = PIPE_FORMAT_NONE; + + /* + * Look up key in cache. + */ + ops = (struct cell_command_fragment_ops *) + util_keymap_lookup(cell->fragment_ops_cache, &key); + + /* + * If not found, create/save new fragment ops command. + */ + if (!ops) { + struct spe_function spe_code_front, spe_code_back; + unsigned int facing_dependent, total_code_size; + + if (0) + debug_printf("**** Create New Fragment Ops\n"); + + /* Prepare the buffer that will hold the generated code. The + * "0" passed in for the size means that the SPE code will + * use a default size. + */ + spe_init_func(&spe_code_front, 0); + spe_init_func(&spe_code_back, 0); + + /* Generate new code. Always generate new code for both front-facing + * and back-facing fragments, even if it's the same code in both + * cases. + */ + cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); + cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); + + /* Make sure the code is a multiple of 8 bytes long; this is + * required to ensure that the dual pipe instruction alignment + * is correct. It's also important for the SPU unpacking, + * which assumes 8-byte boundaries. + */ + unsigned int front_code_size = spe_code_size(&spe_code_front); + while (front_code_size % 8 != 0) { + spe_lnop(&spe_code_front); + front_code_size = spe_code_size(&spe_code_front); + } + unsigned int back_code_size = spe_code_size(&spe_code_back); + while (back_code_size % 8 != 0) { + spe_lnop(&spe_code_back); + back_code_size = spe_code_size(&spe_code_back); + } + + /* Determine whether the code we generated is facing-dependent, by + * determining whether the generated code is different for the front- + * and back-facing fragments. + */ + if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { + /* Code is identical; only need one copy. */ + facing_dependent = 0; + total_code_size = front_code_size; + } + else { + /* Code is different for front-facing and back-facing fragments. + * Need to send both copies. + */ + facing_dependent = 1; + total_code_size = front_code_size + back_code_size; + } + + /* alloc new fragment ops command. Note that this structure + * has variant length based on the total code size required. + */ + ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); + /* populate the new cell_command_fragment_ops object */ + ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; + ops->total_code_size = total_code_size; + ops->front_code_index = 0; + memcpy(ops->code, spe_code_front.store, front_code_size); + if (facing_dependent) { + /* We have separate front- and back-facing code. Append the + * back-facing code to the buffer. Be careful because the code + * size is in bytes, but the buffer is of unsigned elements. + */ + ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); + memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); + } + else { + /* Use the same code for front- and back-facing fragments */ + ops->back_code_index = ops->front_code_index; + } + + /* Set the fields for the fallback case. Note that these fields + * (and the whole fallback case) will eventually go away. + */ + ops->dsa = *cell->depth_stencil; + ops->blend = *cell->blend; + ops->blend_color = cell->blend_color; + + /* insert cell_command_fragment_ops object into keymap/cache */ + util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); + + /* release rtasm buffer */ + spe_release_func(&spe_code_front); + spe_release_func(&spe_code_back); + } + else { + if (0) + debug_printf("**** Re-use Fragment Ops\n"); + } + + return ops; +} + + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint32_t *dst = (uint32_t *) + cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); + *dst = cmd; + memcpy(dst + 4, state, state_size); +} + + +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ +void +cell_emit_state(struct cell_context *cell) +{ + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { + struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; + struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); + struct cell_command_framebuffer *fb + = cell_batch_alloc16(cell, sizeof(*fb)); + fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; + fb->color_start = cell->cbuf_map[0]; + fb->color_format = cbuf->format; + fb->depth_start = cell->zsbuf_map; + fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; + fb->width = cell->framebuffer.width; + fb->height = cell->framebuffer.height; +#if 0 + printf("EMIT color format %s\n", pf_name(fb->color_format)); + printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif + } + + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); + struct cell_command_rasterizer *rast = + cell_batch_alloc16(cell, sizeof(*rast)); + rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + + if (cell->dirty & (CELL_NEW_FS)) { + /* Send new fragment program to SPUs */ + STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); + struct cell_command_fragment_program *fp + = cell_batch_alloc16(cell, sizeof(*fp)); + fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; + fp->num_inst = cell->fs->code.num_inst; + memcpy(&fp->code, cell->fs->code.store, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + if (0) { + int i; + printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); + for (i = 0; i < fp->num_inst; i++) { + printf(" %3d: 0x%08x\n", i, fp->code[i]); + } + } + } + + if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { + const uint shader = PIPE_SHADER_FRAGMENT; + const uint num_const = cell->constants[shader].size / sizeof(float); + uint i, j; + float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); + uint32_t *ibuf = (uint32_t *) buf; + const float *constants = pipe_buffer_map(cell->pipe.screen, + cell->constants[shader].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; + ibuf[4] = num_const; + j = 8; + for (i = 0; i < num_const; i++) { + buf[j++] = constants[i]; + } + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + } + + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | + CELL_NEW_DEPTH_STENCIL | + CELL_NEW_BLEND)) { + struct cell_command_fragment_ops *fops, *fops_cmd; + /* Note that cell_command_fragment_ops is a variant-sized record */ + fops = lookup_fragment_ops(cell); + fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); + memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); + } + + if (cell->dirty & CELL_NEW_SAMPLER) { + uint i; + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + if (cell->dirty_samplers & (1 << i)) { + if (cell->sampler[i]) { + STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); + struct cell_command_sampler *sampler + = cell_batch_alloc16(cell, sizeof(*sampler)); + sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; + sampler->unit = i; + sampler->state = *cell->sampler[i]; + } + } + } + cell->dirty_samplers = 0x0; + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + uint i; + for (i = 0;i < CELL_MAX_SAMPLERS; i++) { + if (cell->dirty_textures & (1 << i)) { + STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); + struct cell_command_texture *texture + = (struct cell_command_texture *)cell_batch_alloc16(cell, sizeof(*texture)); + texture->opcode[0] = CELL_CMD_STATE_TEXTURE; + texture->unit = i; + if (cell->texture[i]) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = cell->texture[i]->tiled_mapped[level]; + texture->width[level] = cell->texture[i]->base.width[level]; + texture->height[level] = cell->texture[i]->base.height[level]; + texture->depth[level] = cell->texture[i]->base.depth[level]; + } + texture->target = cell->texture[i]->base.target; + } + else { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = NULL; + texture->width[level] = 0; + texture->height[level] = 0; + texture->depth[level] = 0; + } + texture->target = 0; + } + } + } + cell->dirty_textures = 0x0; + } + + if (cell->dirty & CELL_NEW_VERTEX_INFO) { + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); + } + +#if 0 + if (cell->dirty & CELL_NEW_VS) { + const struct draw_context *const draw = cell->draw; + struct cell_shader_info info; + + info.num_outputs = draw_num_vs_outputs(draw); + info.declarations = (uintptr_t) draw->vs.machine.Declarations; + info.num_declarations = draw->vs.machine.NumDeclarations; + info.instructions = (uintptr_t) draw->vs.machine.Instructions; + info.num_instructions = draw->vs.machine.NumInstructions; + info.immediates = (uintptr_t) draw->vs.machine.Imms; + info.num_immediates = draw->vs.machine.ImmLimit / 4; + + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); + } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 0000000000..59f8affe8d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 0000000000..78cb446c14 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1430 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa Current alpha-test state + * \param f Function to which code should be appended + * \param mask Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int alphas) +{ + /* If the alpha function is either NEVER or ALWAYS, there is no need to + * load the reference value into a register. ALWAYS is a fairly common + * case, and this optimization saves 2 instructions. + */ + if (dsa->alpha.enabled + && (dsa->alpha.func != PIPE_FUNC_NEVER) + && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + int ref = spe_allocate_available_register(f); + int tmp_a = spe_allocate_available_register(f); + int tmp_b = spe_allocate_available_register(f); + union { + float f; + unsigned u; + } ref_val; + boolean complement = FALSE; + + ref_val.f = dsa->alpha.ref; + + spe_il(f, ref, ref_val.u & 0x0000ffff); + spe_ilh(f, ref, ref_val.u >> 16); + + switch (dsa->alpha.func) { + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_EQUAL: + spe_fceq(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GREATER: + spe_fcgt(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GEQUAL: + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; + + case PIPE_FUNC_ALWAYS: + case PIPE_FUNC_NEVER: + default: + assert(0); + break; + } + + if (complement) { + spe_andc(f, mask, mask, tmp_a); + } else { + spe_and(f, mask, mask, tmp_a); + } + + spe_release_register(f, ref); + spe_release_register(f, tmp_a); + spe_release_register(f, tmp_b); + } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { + spe_il(f, mask, 0); + } +} + + +/** + * Generate code to perform Z testing. Four Z values are tested at once. + * \param dsa Current depth-test state + * \param f Function to which code should be appended + * \param mask Index of register to contain depth-pass mask + * \param stored Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned. If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int stored, int calculated) +{ + unsigned func = (dsa->depth.enabled) + ? dsa->depth.func : PIPE_FUNC_ALWAYS; + int tmp = spe_allocate_available_register(f); + boolean compliment = FALSE; + + switch (func) { + case PIPE_FUNC_NEVER: + spe_il(f, mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceq(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgt(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LESS: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgt(f, mask, calculated, stored); + spe_ceq(f, tmp, calculated, stored); + spe_or(f, mask, mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + spe_il(f, mask, ~0); + break; + + default: + assert(0); + break; + } + + spe_release_register(f, tmp); + return compliment; +} + + +/** + * Generate code to apply the stencil operation (after testing). + * \note Emits a maximum of 5 instructions. + * + * \warning + * Since \c out and \c in might be the same register, this routine cannot + * generate code that uses \c out as a temporary. + */ +static void +emit_stencil_op(struct spe_function *f, + int out, int in, int mask, unsigned op, unsigned ref) +{ + const int clamp = spe_allocate_available_register(f); + const int clamp_mask = spe_allocate_available_register(f); + const int result = spe_allocate_available_register(f); + + switch(op) { + case PIPE_STENCIL_OP_KEEP: + assert(0); + case PIPE_STENCIL_OP_ZERO: + spe_il(f, result, 0); + break; + case PIPE_STENCIL_OP_REPLACE: + spe_il(f, result, ref); + break; + case PIPE_STENCIL_OP_INCR: + /* clamp = [0xff, 0xff, 0xff, 0xff] */ + spe_il(f, clamp, 0x0ff); + /* result[i] = in[i] + 1 */ + spe_ai(f, result, in, 1); + /* clamp_mask[i] = (result[i] > 0xff) */ + spe_clgti(f, clamp_mask, result, 0x0ff); + /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_DECR: + spe_il(f, clamp, 0); + spe_ai(f, result, in, -1); + + /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned + * arithmetic. + */ + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + spe_ai(f, result, in, 1); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + spe_ai(f, result, in, -1); + break; + case PIPE_STENCIL_OP_INVERT: + spe_nor(f, result, in, in); + break; + default: + assert(0); + } + + spe_selb(f, out, in, result, mask); + + spe_release_register(f, result); + spe_release_register(f, clamp_mask); + spe_release_register(f, clamp); +} + + +/** + * Generate code to do stencil test. Four pixels are tested at once. + * \param dsa Depth / stencil test state + * \param face 0 for front face, 1 for back face + * \param f Function to append instructions to + * \param mask Register containing mask of fragments passing the + * alpha test + * \param depth_mask Register containing mask of fragments passing the + * depth test + * \param depth_compliment Is \c depth_mask the compliment of the actual mask? + * \param stencil Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + * and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, + unsigned face, + struct spe_function *f, + int mask, + int depth_mask, + boolean depth_complement, + int stencil, + int depth_pass) +{ + int stencil_fail = spe_allocate_available_register(f); + int depth_fail = spe_allocate_available_register(f); + int stencil_mask = spe_allocate_available_register(f); + int stencil_pass = spe_allocate_available_register(f); + int face_stencil = spe_allocate_available_register(f); + int stencil_src = stencil; + const unsigned ref = (dsa->stencil[face].ref_value + & dsa->stencil[face].value_mask); + boolean complement = FALSE; + int stored; + int tmp = spe_allocate_available_register(f); + + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].value_mask != 0x0ff)) { + stored = spe_allocate_available_register(f); + spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } else { + stored = stencil; + } + + + switch (dsa->stencil[face].func) { + case PIPE_FUNC_NEVER: + spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ + break; + + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + /* stencil_mask[i] = (stored[i] == ref) */ + spe_ceqi(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + complement = TRUE; + /* stencil_mask[i] = (stored[i] > ref) */ + spe_clgti(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + /* stencil_mask[i] = (stored[i] > ref) */ + spe_clgti(f, stencil_mask, stored, ref); + /* tmp[i] = (stored[i] == ref) */ + spe_ceqi(f, tmp, stored, ref); + /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ + spe_or(f, stencil_mask, stencil_mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + /* See comment below. */ + break; + + default: + assert(0); + break; + } + + if (stored != stencil) { + spe_release_register(f, stored); + } + spe_release_register(f, tmp); + + + /* ALWAYS is a very common stencil-test, so some effort is applied to + * optimize that case. The stencil-pass mask is the same as the input + * fragment mask. This makes the stencil-test (above) a no-op, and the + * input fragment mask can be "renamed" the stencil-pass mask. + */ + if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { + spe_release_register(f, stencil_pass); + stencil_pass = mask; + } else { + if (complement) { + spe_andc(f, stencil_pass, mask, stencil_mask); + } else { + spe_and(f, stencil_pass, mask, stencil_mask); + } + } + + if (depth_complement) { + spe_andc(f, depth_pass, stencil_pass, depth_mask); + } else { + spe_and(f, depth_pass, stencil_pass, depth_mask); + } + + + /* Conditionally emit code to update the stencil value under various + * condititons. Note that there is no need to generate code under the + * following circumstances: + * + * - Stencil write mask is zero. + * - For stencil-fail if the stencil test is ALWAYS + * - For depth-fail if the stencil test is NEVER + * - For depth-pass if the stencil test is NEVER + * - Any of the 3 conditions if the operation is KEEP + */ + if (dsa->stencil[face].write_mask != 0) { + if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { + if (complement) { + spe_and(f, stencil_fail, mask, stencil_mask); + } else { + spe_andc(f, stencil_fail, mask, stencil_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, + dsa->stencil[face].fail_op, + dsa->stencil[face].ref_value); + + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { + if (depth_complement) { + spe_and(f, depth_fail, stencil_pass, depth_mask); + } else { + spe_andc(f, depth_fail, stencil_pass, depth_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, depth_fail, + dsa->stencil[face].zfail_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { + emit_stencil_op(f, face_stencil, stencil_src, depth_pass, + dsa->stencil[face].zpass_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + } + + spe_release_register(f, stencil_fail); + spe_release_register(f, depth_fail); + spe_release_register(f, stencil_mask); + if (stencil_pass != mask) { + spe_release_register(f, stencil_pass); + } + + /* If all of the stencil operations were KEEP or the stencil write mask was + * zero, "stencil_src" will still be set to "stencil". In this case + * release the "face_stencil" register. Otherwise apply the stencil write + * mask to select bits from the calculated stencil value and the previous + * stencil value. + */ + if (stencil_src == stencil) { + spe_release_register(f, face_stencil); + } else if (dsa->stencil[face].write_mask != 0x0ff) { + int tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_selb(f, stencil_src, stencil, stencil_src, tmp); + + spe_release_register(f, tmp); + } + + return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ + struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; + struct spe_function *const f = &cdsa->code; + + /* This code generates a maximum of 6 (alpha test) + 3 (depth test) + * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round + * up to 64 to make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + /* Allocate registers for the function's input parameters. Cleverly (and + * clever code is usually dangerous, but I couldn't resist) the generated + * function returns a structure. Returned structures start with register + * 3, and the structure fields are ordered to match up exactly with the + * input parameters. + */ + int mask = spe_allocate_register(f, 3); + int depth = spe_allocate_register(f, 4); + int stencil = spe_allocate_register(f, 5); + int zvals = spe_allocate_register(f, 6); + int frag_a = spe_allocate_register(f, 7); + int facing = spe_allocate_register(f, 8); + + int depth_mask = spe_allocate_available_register(f); + + boolean depth_complement; + + + emit_alpha_test(dsa, f, mask, frag_a); + + depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + + if (dsa->stencil[0].enabled) { + const int front_depth_pass = spe_allocate_available_register(f); + int front_stencil = emit_stencil_test(dsa, 0, f, mask, + depth_mask, depth_complement, + stencil, front_depth_pass); + + if (dsa->stencil[1].enabled) { + const int back_depth_pass = spe_allocate_available_register(f); + int back_stencil = emit_stencil_test(dsa, 1, f, mask, + depth_mask, depth_complement, + stencil, back_depth_pass); + + /* If the front facing stencil value and the back facing stencil + * value are stored in the same register, there is no need to select + * a value based on the facing. This can happen if the stencil value + * was not modified due to the write masks being zero, the stencil + * operations being KEEP, etc. + */ + if (front_stencil != back_stencil) { + spe_selb(f, stencil, back_stencil, front_stencil, facing); + } + + if (back_stencil != stencil) { + spe_release_register(f, back_stencil); + } + + if (front_stencil != stencil) { + spe_release_register(f, front_stencil); + } + + spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + + spe_release_register(f, back_depth_pass); + } else { + if (front_stencil != stencil) { + spe_or(f, stencil, front_stencil, front_stencil); + spe_release_register(f, front_stencil); + } + spe_or(f, mask, front_depth_pass, front_depth_pass); + } + + spe_release_register(f, front_depth_pass); + } else if (dsa->depth.enabled) { + if (depth_complement) { + spe_andc(f, mask, mask, depth_mask); + } else { + spe_and(f, mask, mask, depth_mask); + } + } + + if (dsa->depth.writemask) { + spe_selb(f, depth, depth, zvals, mask); + } + + spe_bi(f, 0, 0, 0); /* return from function call */ + + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# alpha (%sabled)\n", + (dsa->alpha.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->alpha.func); + printf("# ref: %.2f\n", dsa->alpha.ref); + + printf("# depth (%sabled)\n", + (dsa->depth.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->depth.func); + + for (i = 0; i < 2; i++) { + printf("# %s stencil (%sabled)\n", + (i == 0) ? "front" : "back", + (dsa->stencil[i].enabled) ? "en" : "dis"); + + printf("# func: %u\n", dsa->stencil[i].func); + printf("# op (sf, zf, zp): %u %u %u\n", + dsa->stencil[i].fail_op, + dsa->stencil[i].zfail_op, + dsa->stencil[i].zpass_op); + printf("# ref value / value mask / write mask: %02x %02x %02x\n", + dsa->stencil[i].ref_value, + dsa->stencil[i].value_mask, + dsa->stencil[i].write_mask); + } + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, + unsigned factor, + int src_alpha, int dst_alpha, int const_alpha) +{ + int factor_reg; + int tmp; + + + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_or(f, factor_reg, src_alpha, src_alpha); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor_reg = dst_alpha; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + factor_reg = spe_allocate_available_register(f); + + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, const_alpha); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = const_alpha; + break; + + case PIPE_BLENDFACTOR_ZERO: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, src_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, dst_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + factor_reg = -1; + break; + } + + return factor_reg; +} + + +/** + * \note Emits a maximum of 6 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, + unsigned sF, unsigned mask, + const int *src, + const int *dst, + const int *const_color, + int *factor) +{ + int tmp; + unsigned i; + + + factor[0] = -1; + factor[1] = -1; + factor[2] = -1; + factor[3] = -1; + + switch (sF) { + case PIPE_BLENDFACTOR_ONE: + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_or(f, factor[i], src[i], src[i]); + } + } + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_or(f, factor[0], src[3], src[3]); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor[0] = dst[3]; + factor[1] = dst[3]; + factor[2] = dst[3]; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + factor[0] = dst[0]; + factor[1] = dst[1]; + factor[2] = dst[2]; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + /* Alpha saturate means min(As, 1-Ad). + */ + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, tmp, tmp, dst[3]); + spe_fcgt(f, factor[0], tmp, src[3]); + spe_selb(f, factor[0], src[3], tmp, factor[0]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; i++) { + factor[i] = spe_allocate_available_register(f); + + spe_fs(f, factor[i], tmp, const_color[i]); + } + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = const_color[i]; + } + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, const_color[3]); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = const_color[3]; + factor[1] = factor[0]; + factor[2] = factor[0]; + break; + + case PIPE_BLENDFACTOR_ZERO: + break; + + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, src[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, src[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, dst[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, dst[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + } +} + + +static void +emit_blend_calculation(struct spe_function *f, + unsigned func, unsigned sF, unsigned dF, + int src, int src_factor, int dst, int dst_factor) +{ + int tmp = spe_allocate_available_register(f); + + switch (func) { + case PIPE_BLEND_ADD: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fa(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fma(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fms(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_REVERSE_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, src); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, dst, src); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, src, src_factor); + spe_fms(f, src, src, dst_factor, tmp); + } + break; + + case PIPE_BLEND_MIN: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, src, dst, tmp); + break; + + case PIPE_BLEND_MAX: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, dst, src, tmp); + break; + + default: + assert(0); + } + + spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb) +{ + struct pipe_blend_state *const b = &cb->base; + struct spe_function *const f = &cb->code; + + /* This code generates a maximum of 3 (source alpha factor) + * + 3 (destination alpha factor) + (3 * 6) (source color factor) + * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to + * make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + const int frag[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + const int pixel[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + const int const_color[4] = { + spe_allocate_register(f, 11), + spe_allocate_register(f, 12), + spe_allocate_register(f, 13), + spe_allocate_register(f, 14), + }; + unsigned func[4]; + unsigned sF[4]; + unsigned dF[4]; + unsigned i; + int src_factor[4]; + int dst_factor[4]; + + + /* Does the selected blend mode make use of the source / destination + * color (RGB) blend factors? + */ + boolean need_color_factor = b->blend_enable + && (b->rgb_func != PIPE_BLEND_MIN) + && (b->rgb_func != PIPE_BLEND_MAX); + + /* Does the selected blend mode make use of the source / destination + * alpha blend factors? + */ + boolean need_alpha_factor = b->blend_enable + && (b->alpha_func != PIPE_BLEND_MIN) + && (b->alpha_func != PIPE_BLEND_MAX); + + + if (b->blend_enable) { + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + switch (b->alpha_src_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + sF[3] = PIPE_BLENDFACTOR_ONE; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + sF[3] = b->alpha_src_factor + 1; + break; + default: + sF[3] = b->alpha_src_factor; + } + + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + switch (b->alpha_dst_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + dF[3] = b->alpha_dst_factor + 1; + break; + default: + dF[3] = b->alpha_dst_factor; + } + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + } else { + sF[0] = PIPE_BLENDFACTOR_ONE; + sF[1] = PIPE_BLENDFACTOR_ONE; + sF[2] = PIPE_BLENDFACTOR_ONE; + sF[3] = PIPE_BLENDFACTOR_ONE; + dF[0] = PIPE_BLENDFACTOR_ZERO; + dF[1] = PIPE_BLENDFACTOR_ZERO; + dF[2] = PIPE_BLENDFACTOR_ZERO; + dF[3] = PIPE_BLENDFACTOR_ZERO; + + func[0] = PIPE_BLEND_ADD; + func[1] = PIPE_BLEND_ADD; + func[2] = PIPE_BLEND_ADD; + func[3] = PIPE_BLEND_ADD; + } + + + /* If alpha writing is enabled and the alpha blend mode requires use of + * the alpha factor, calculate the alpha factor. + */ + if (((b->colormask & 8) != 0) && need_alpha_factor) { + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], + frag[3], pixel[3]); + + /* If the alpha destination blend factor is the same as the alpha source + * blend factor, re-use the previously calculated value. + */ + dst_factor[3] = (dF[3] == sF[3]) + ? src_factor[3] + : emit_alpha_factor_calculation(f, dF[3], const_color[3], + frag[3], pixel[3]); + } + + + if (sF[0] == sF[3]) { + src_factor[0] = src_factor[3]; + src_factor[1] = src_factor[3]; + src_factor[2] = src_factor[3]; + } else if (sF[0] == dF[3]) { + src_factor[0] = dst_factor[3]; + src_factor[1] = dst_factor[3]; + src_factor[2] = dst_factor[3]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_src_factor, + b->colormask, + frag, pixel, const_color, src_factor); + } + + + if (dF[0] == sF[3]) { + dst_factor[0] = src_factor[3]; + dst_factor[1] = src_factor[3]; + dst_factor[2] = src_factor[3]; + } else if (dF[0] == dF[3]) { + dst_factor[0] = dst_factor[3]; + dst_factor[1] = dst_factor[3]; + dst_factor[2] = dst_factor[3]; + } else if (dF[0] == sF[0]) { + dst_factor[0] = src_factor[0]; + dst_factor[1] = src_factor[1]; + dst_factor[2] = src_factor[2]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_dst_factor, + b->colormask, + frag, pixel, const_color, dst_factor); + } + + + + for (i = 0; i < 4; ++i) { + if ((b->colormask & (1U << i)) != 0) { + emit_blend_calculation(f, + func[i], sF[i], dF[i], + frag[i], src_factor[i], + pixel[i], dst_factor[i]); + } + } + + spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + + printf("# %u instructions\n", f->csr - f->store); + printf("# blend (%sabled)\n", + (cb->base.blend_enable) ? "en" : "dis"); + printf("# RGB func / sf / df: %u %u %u\n", + cb->base.rgb_func, + cb->base.rgb_src_factor, + cb->base.rgb_dst_factor); + printf("# ALP func / sf / df: %u %u %u\n", + cb->base.alpha_func, + cb->base.alpha_src_factor, + cb->base.alpha_dst_factor); + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif +} + + +static int +PC_OFFSET(const struct spe_function *f, const void *d) +{ + const intptr_t pc = (intptr_t) &f->store[f->num_inst]; + const intptr_t ea = ~0x0f & (intptr_t) d; + + return (ea - pc) >> 2; +} + + +/** + * Generate code to perform color conversion and logic op + * + * \bug + * The code generated by this function should also perform dithering. + * + * \bug + * The code generated by this function should also perform color-write + * masking. + * + * \bug + * Only two framebuffer formats are supported at this time. + */ +void +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, + struct pipe_surface *surf) +{ + const unsigned logic_op = (blend->logicop_enable) + ? blend->logicop_func : PIPE_LOGICOP_COPY; + + /* This code generates a maximum of 37 instructions. An additional 32 + * bytes (equiv. to 8 instructions) are needed for data storage. Round up + * to 64 to make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + /* Pixel colors in framebuffer format in AoS layout. + */ + const int pixel[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + + /* Fragment colors stored as floats in SoA layout. + */ + const int frag[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + + const int mask = spe_allocate_register(f, 11); + + + /* Short-circuit the noop and invert cases. + */ + if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) { + spe_bi(f, 0, 0, 0); + return; + } else if (logic_op == PIPE_LOGICOP_INVERT) { + spe_nor(f, pixel[0], pixel[0], pixel[0]); + spe_nor(f, pixel[1], pixel[1], pixel[1]); + spe_nor(f, pixel[2], pixel[2], pixel[2]); + spe_nor(f, pixel[3], pixel[3], pixel[3]); + spe_bi(f, 0, 0, 0); + return; + } + + + const int tmp[4] = { + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + }; + + const int shuf_xpose_hi = spe_allocate_available_register(f); + const int shuf_xpose_lo = spe_allocate_available_register(f); + const int shuf_color = spe_allocate_available_register(f); + + + /* Pointer to the begining of the function's private data area. + */ + uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); + + + /* Convert fragment colors to framebuffer format in AoS layout. + */ + switch (surf->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + data[0] = 0x00010203; + data[1] = 0x10111213; + data[2] = 0x04050607; + data[3] = 0x14151617; + data[4] = 0x0c000408; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + data[0] = 0x03020100; + data[1] = 0x13121110; + data[2] = 0x07060504; + data[3] = 0x17161514; + data[4] = 0x0804000c; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + default: + fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); + ASSERT(0); + } + + spe_ilh(f, tmp[0], 0x0808); + spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); + spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); + spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); + + spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); + spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); + spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); + spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); + + spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); + spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); + spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); + spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); + + spe_cfltu(f, frag[0], frag[0], 32); + spe_cfltu(f, frag[1], frag[1], 32); + spe_cfltu(f, frag[2], frag[2], 32); + spe_cfltu(f, frag[3], frag[3], 32); + + spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); + spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); + spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); + spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); + + + /* If logic op is enabled, perform the requested logical operation on the + * converted fragment colors and the pixel colors. + */ + switch (logic_op) { + case PIPE_LOGICOP_CLEAR: + spe_il(f, frag[0], 0); + spe_il(f, frag[1], 0); + spe_il(f, frag[2], 0); + spe_il(f, frag[3], 0); + break; + case PIPE_LOGICOP_NOR: + spe_nor(f, frag[0], frag[0], pixel[0]); + spe_nor(f, frag[1], frag[1], pixel[1]); + spe_nor(f, frag[2], frag[2], pixel[2]); + spe_nor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND_INVERTED: + spe_andc(f, frag[0], pixel[0], frag[0]); + spe_andc(f, frag[1], pixel[1], frag[1]); + spe_andc(f, frag[2], pixel[2], frag[2]); + spe_andc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY_INVERTED: + spe_nor(f, frag[0], frag[0], frag[0]); + spe_nor(f, frag[1], frag[1], frag[1]); + spe_nor(f, frag[2], frag[2], frag[2]); + spe_nor(f, frag[3], frag[3], frag[3]); + break; + case PIPE_LOGICOP_AND_REVERSE: + spe_andc(f, frag[0], frag[0], pixel[0]); + spe_andc(f, frag[1], frag[1], pixel[1]); + spe_andc(f, frag[2], frag[2], pixel[2]); + spe_andc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_XOR: + spe_xor(f, frag[0], frag[0], pixel[0]); + spe_xor(f, frag[1], frag[1], pixel[1]); + spe_xor(f, frag[2], frag[2], pixel[2]); + spe_xor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_NAND: + spe_nand(f, frag[0], frag[0], pixel[0]); + spe_nand(f, frag[1], frag[1], pixel[1]); + spe_nand(f, frag[2], frag[2], pixel[2]); + spe_nand(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND: + spe_and(f, frag[0], frag[0], pixel[0]); + spe_and(f, frag[1], frag[1], pixel[1]); + spe_and(f, frag[2], frag[2], pixel[2]); + spe_and(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_EQUIV: + spe_eqv(f, frag[0], frag[0], pixel[0]); + spe_eqv(f, frag[1], frag[1], pixel[1]); + spe_eqv(f, frag[2], frag[2], pixel[2]); + spe_eqv(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR_INVERTED: + spe_orc(f, frag[0], pixel[0], frag[0]); + spe_orc(f, frag[1], pixel[1], frag[1]); + spe_orc(f, frag[2], pixel[2], frag[2]); + spe_orc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY: + break; + case PIPE_LOGICOP_OR_REVERSE: + spe_orc(f, frag[0], frag[0], pixel[0]); + spe_orc(f, frag[1], frag[1], pixel[1]); + spe_orc(f, frag[2], frag[2], pixel[2]); + spe_orc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR: + spe_or(f, frag[0], frag[0], pixel[0]); + spe_or(f, frag[1], frag[1], pixel[1]); + spe_or(f, frag[2], frag[2], pixel[2]); + spe_or(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_SET: + spe_il(f, frag[0], ~0); + spe_il(f, frag[1], ~0); + spe_il(f, frag[2], ~0); + spe_il(f, frag[3], ~0); + break; + + /* These two cases are short-circuited above. + */ + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_NOOP: + default: + assert(0); + } + + + /* Apply fragment mask. + */ + spe_ilh(f, tmp[0], 0x0000); + spe_ilh(f, tmp[1], 0x0404); + spe_ilh(f, tmp[2], 0x0808); + spe_ilh(f, tmp[3], 0x0c0c); + + spe_shufb(f, tmp[0], mask, mask, tmp[0]); + spe_shufb(f, tmp[1], mask, mask, tmp[1]); + spe_shufb(f, tmp[2], mask, mask, tmp[2]); + spe_shufb(f, tmp[3], mask, mask, tmp[3]); + + spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); + spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); + spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); + spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); + + spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# %u instructions\n", f->csr - f->store); + + printf("\t.text\n"); + for (i = 0; i < 64; i++) { + printf("\t.long\t0x%04x\n", p[i]); + } + fflush(stdout); + } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 0000000000..a8267a5133 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,39 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb); + +extern void +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, + struct pipe_surface *surf); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c new file mode 100644 index 0000000000..cda39f8d59 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_parse.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_gen_fp.h" + + +/** cast wrapper */ +static INLINE struct cell_fragment_shader_state * +cell_fragment_shader_state(void *shader) +{ + return (struct cell_fragment_shader_state *) shader; +} + + +/** cast wrapper */ +static INLINE struct cell_vertex_shader_state * +cell_vertex_shader_state(void *shader) +{ + return (struct cell_vertex_shader_state *) shader; +} + + +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ +static void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_fragment_shader_state *cfs; + + cfs = CALLOC_STRUCT(cell_fragment_shader_state); + if (!cfs) + return NULL; + + cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cfs->shader.tokens) { + FREE(cfs); + return NULL; + } + + tgsi_scan_shader(templ->tokens, &cfs->info); + + cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); + + return cfs; +} + + +/** + * Called via pipe->bind_fs_state() + */ +static void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = cell_fragment_shader_state(fs); + + cell->dirty |= CELL_NEW_FS; +} + + +/** + * Called via pipe->delete_fs_state() + */ +static void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); + + spe_release_func(&cfs->code); + + FREE((void *) cfs->shader.tokens); + FREE(cfs); +} + + +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ +static void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs; + + cvs = CALLOC_STRUCT(cell_vertex_shader_state); + if (!cvs) + return NULL; + + cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cvs->shader.tokens) { + FREE(cvs); + return NULL; + } + + tgsi_scan_shader(templ->tokens, &cvs->info); + + cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); + if (cvs->draw_data == NULL) { + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); + return NULL; + } + + return cvs; +} + + +/** + * Called via pipe->bind_vs_state() + */ +static void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = cell_vertex_shader_state(vs); + + draw_bind_vertex_shader(cell->draw, + (cell->vs ? cell->vs->draw_data : NULL)); + + cell->dirty |= CELL_NEW_VS; +} + + +/** + * Called via pipe->delete_vs_state() + */ +static void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); + + draw_delete_vertex_shader(cell->draw, cvs->draw_data); + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); +} + + +/** + * Called via pipe->set_constant_buffer() + */ +static void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + draw_flush(cell->draw); + + /* note: reference counting */ + winsys_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + if (shader == PIPE_SHADER_VERTEX) + cell->dirty |= CELL_NEW_VS_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + cell->dirty |= CELL_NEW_FS_CONSTANTS; +} + + +void +cell_init_shader_functions(struct cell_context *cell) +{ + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 0000000000..fbe55c8472 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "draw/draw_context.h" + + +static void +cell_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct cell_context *cell = cell_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); + cell->num_vertex_elements = count; + + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_elements(cell->draw, count, elements); +} + + +static void +cell_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct cell_context *cell = cell_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); + cell->num_vertex_buffers = count; + + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_buffers(cell->draw, count, buffers); +} + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 0000000000..c9203fee08 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_rect.h" +#include "cell_context.h" +#include "cell_surface.h" + + +void +cell_init_surface_functions(struct cell_context *cell) +{ + cell->pipe.surface_copy = util_surface_copy; + cell->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 0000000000..9e58f32944 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 0000000000..9f83ab8fa4 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,528 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + * Brian Paul + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static unsigned +minify(unsigned d) +{ + return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture *ct) +{ + struct pipe_texture *pt = &ct->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + ct->buffer_size = 0; + + for ( level = 0 ; level <= pt->last_level ; level++ ) { + unsigned size; + unsigned w_tile, h_tile; + + assert(level < CELL_MAX_TEXTURE_LEVELS); + + /* width, height, rounded up to tile size */ + w_tile = align(width, TILE_SIZE); + h_tile = align(height, TILE_SIZE); + + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); + + ct->stride[level] = pt->nblocksx[level] * pt->block.size; + + ct->level_offset[level] = ct->buffer_size; + + size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + if (pt->target == PIPE_TEXTURE_CUBE) + size *= 6; + else + size *= depth; + + ct->buffer_size += size; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +static struct pipe_texture * +cell_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct cell_texture *ct = CALLOC_STRUCT(cell_texture); + if (!ct) + return NULL; + + ct->base = *templat; + ct->base.refcount = 1; + ct->base.screen = screen; + + cell_texture_layout(ct); + + ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer_size); + + if (!ct->buffer) { + FREE(ct); + return NULL; + } + + return &ct->base; +} + + +static void +cell_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ + struct cell_texture *ct = cell_texture(*pt); + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); + */ + + pipe_buffer_reference(screen, &ct->buffer, NULL); + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); + } + } + + FREE(ct); + } + *pt = NULL; +} + + + +/** + * Convert image from linear layout to tiled layout. 4-byte pixels. + */ +static void +twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint src_stride, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + src_stride /= 4; /* convert from bytes to pixels */ + + /* loop over dest tiles */ + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* start of dest tile: */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); + + /* loop over texels in the tile */ + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + ASSERT(srci < h); + ASSERT(srcj < w); + tdst[i * tile_size + j] = src[srci * src_stride + srcj]; + } + } + } + } +} + + +/** + * For Cell. Basically, rearrange the pixels/quads from this layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|.... + * +--+--+--+--+ + * + * to this layout: + * +--+--+ + * |p0|p1|.... + * +--+--+ + * |p2|p3| + * +--+--+ + */ +static void +twiddle_tile(const uint *tileIn, uint *tileOut) +{ + int y, x; + + for (y = 0; y < TILE_SIZE; y+=2) { + for (x = 0; x < TILE_SIZE; x+=2) { + int k = 4 * (y/2 * TILE_SIZE/2 + x/2); + tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; + tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; + tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; + tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; + } + } +} + + +/** + * Convert image from tiled layout to linear layout. 4-byte pixels. + */ +static void +untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint dst_stride, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; + uint *tile_buf; + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + dst_stride /= 4; /* convert from bytes to pixels */ + + tile_buf = align_malloc(tile_size * tile_size * 4, 16); + + /* loop over src tiles */ + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* start of src tile: */ + const uint *tsrc = src + (it * w_t + jt) * tile_size2; + + twiddle_tile(tsrc, tile_buf); + tsrc = tile_buf; + + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); + + /* loop over texels in the tile */ + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + uint dsti = it * tile_size + i; + uint dstj = jt * tile_size + j; + ASSERT(dsti < h); + ASSERT(dstj < w); + dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; + } + } + } + } + + align_free(tile_buf); +} + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_twiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; + const uint bufWidth = align(texWidth, TILE_SIZE); + const uint bufHeight = align(texHeight, TILE_SIZE); + const void *map = pipe_buffer_map(screen, surface->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) ((const ubyte *) map + surface->offset); + + switch (ct->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = bufWidth * bufHeight * 4 * surface->face; + uint *dst; + + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); + /* and map it */ + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); + } + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); + + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); + } + break; + default: + printf("Cell: twiddle unsupported texture format %s\n", pf_name(ct->base.format)); + ; + } + + pipe_buffer_unmap(screen, surface->buffer); +} + + +/** + * Convert SPU tiled texture image data to linear format for app usage. + */ +static void +cell_untwiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; + const void *map = pipe_buffer_map(screen, surface->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) ((const ubyte *) map + surface->offset); + + switch (ct->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = surface->stride * texHeight * 4 * surface->face; + uint *dst; + + if (!ct->untiled_data[level]) { + ct->untiled_data[level] = + align_malloc(surface->stride * texHeight * 4 * numFaces, 16); + } + + dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset); + + untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); + } + break; + default: + { + ct->untiled_data[level] = NULL; + printf("Cell: untwiddle unsupported texture format %s\n", pf_name(ct->base.format)); + } + } + + pipe_buffer_unmap(screen, surface->buffer); +} + + +static struct pipe_surface * +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) +{ + struct pipe_winsys *ws = screen->winsys; + struct cell_texture *ct = cell_texture(pt); + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + winsys_buffer_reference(ws, &ps->buffer, ct->buffer); + ps->format = pt->format; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = ct->stride[level]; + ps->offset = ct->level_offset[level]; + ps->usage = usage; + + /* XXX may need to override usage flags (see sp_texture.c) */ + + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + ps->nblocksy * + ps->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) { + /* convert from tiled to linear layout */ + cell_untwiddle_texture(screen, ps); + } + } + return ps; +} + + +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + struct cell_texture *ct = cell_texture((*s)->texture); + const uint level = (*s)->level; + + if (((*s)->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) + { + align_free(ct->untiled_data[level]); + ct->untiled_data[level] = NULL; + } + + /* XXX if done rendering to teximage, re-tile */ + + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + +static void * +cell_surface_map(struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags) +{ + ubyte *map; + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + + assert(ct); + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + else + { + if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) { + return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset); + } + else { + return (void *) (map + surface->offset); + } + } +} + + +static void +cell_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + + assert(ct); + + if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) && + (surface->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) { + /* convert from linear to tiled layout */ + cell_twiddle_texture(screen, surface); + } + + pipe_buffer_unmap( screen, surface->buffer ); +} + + + +void +cell_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = cell_texture_create; + screen->texture_release = cell_texture_release; + + screen->get_tex_surface = cell_get_tex_surface; + screen->tex_surface_release = cell_tex_surface_release; + + screen->surface_map = cell_surface_map; + screen->surface_unmap = cell_surface_unmap; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 0000000000..7018b0c9bf --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct cell_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ + struct pipe_texture base; + + unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; + unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; + + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; + void *untiled_data[CELL_MAX_TEXTURE_LEVELS]; +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ + return (struct cell_texture *) pt; +} + + + +extern void +cell_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 0000000000..ab54e79689 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Vertex buffer code. The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * + * Authors + * Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ + struct vbuf_render base; + struct cell_context *cell; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ + return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); + release->opcode[0] = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(cell, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static boolean +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->prim = prim; + /*printf("cell_set_prim %u\n", prim);*/ + return TRUE; +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, + const ushort *indices, + uint nr_indices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + float xmin, ymin, xmax, ymax; + uint i; + uint nr_vertices = 0, min_index = ~0; + const void *vertices = cvbr->vertex_buffer; + const uint vertex_size = cvbr->vertex_size; + + for (i = 0; i < nr_indices; i++) { + if (indices[i] > nr_vertices) + nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; + } + nr_vertices++; + +#if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", + nr_indices, nr_vertices); + printf(" "); + for (i = 0; i < nr_indices; i += 3) { + printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); + } + printf("\n"); +#elif 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", + nr_indices, nr_vertices, + indices[0], indices[1], indices[2]); + printf("ind space = %u, vert space = %u, space = %u\n", + nr_indices * 2, + nr_vertices * 4 * cell->vertex_info.size, + cell_batch_free_space(cell)); +#endif + + /* compute x/y bounding box */ + xmin = ymin = 1e50; + xmax = ymax = -1e50; + for (i = min_index; i < nr_vertices; i++) { + const float *v = (float *) ((ubyte *) vertices + i * vertex_size); + if (v[0] < xmin) + xmin = v[0]; + if (v[0] > xmax) + xmax = v[0]; + if (v[1] < ymin) + ymin = v[1]; + if (v[1] > ymax) + ymax = v[1]; + } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif + + if (cvbr->prim != PIPE_PRIM_TRIANGLES) + return; /* only render tris for now */ + + /* build/insert batch RENDER command */ + { + const uint index_bytes = ROUNDUP16(nr_indices * 2); + const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); + STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + + struct cell_command_render *render + = (struct cell_command_render *) + cell_batch_alloc16(cell, batch_size); + + render->opcode[0] = CELL_CMD_RENDER; + render->prim_type = cvbr->prim; + + render->num_indexes = nr_indices; + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ + render->vertex_size = 4 * cell->vertex_info.size; + render->num_verts = nr_vertices; + if (ALLOW_INLINE_VERTS && + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc16(cell, vertex_bytes); + memcpy(dst, vertices, vertex_bytes); + render->inline_verts = TRUE; + render->vertex_buf = ~0; + } + else { + /* vertex data in separate buffer */ + render->inline_verts = FALSE; + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; + } + + render->xmin = xmin; + render->ymin = ymin; + render->xmax = xmax; + render->ymax = ymax; + } + +#if 0 + /* helpful for debug */ + cell_flush_int(cell, CELL_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->cell->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ + assert(cell->draw); + + cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + + cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; + cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; + cell->vbuf_render->base.draw = cell_vbuf_draw; + cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; + cell->vbuf_render->base.destroy = cell_vbuf_destroy; + + cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif + + cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 0000000000..d265cbf770 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 0000000000..9cba537d9e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,346 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" + +#include "cell_context.h" +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p Function that the transpose operation is to be appended to + * \param row0 Register containing row 0 of the source matrix + * \param row1 Register containing row 1 of the source matrix + * \param row2 Register containing row 2 of the source matrix + * \param row3 Register containing row 3 of the source matrix + * \param dest_ptr Register containing the address of the destination matrix + * \param shuf_ptr Register containing the address of the shuffled data + * \param count Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + * + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, + unsigned row0, unsigned row1, unsigned row2, + unsigned row3, unsigned dest_ptr, + unsigned shuf_ptr, unsigned count) +{ + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); + int t3; + int t4; + int col0; + int col1; + int col2; + int col3; + + + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); + spe_shufb(p, t1, row0, row2, shuf_hi); + spe_shufb(p, t2, row0, row2, shuf_lo); + + + /* row0 and row2 are now no longer needed. Re-use those registers as + * temporaries. + */ + t3 = row0; + t4 = row2; + + spe_shufb(p, t3, row1, row3, shuf_hi); + spe_shufb(p, t4, row1, row3, shuf_lo); + + + /* row1 and row3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col0 = row1; + col1 = row3; + + spe_shufb(p, col0, t1, t3, shuf_hi); + if (count > 1) { + spe_shufb(p, col1, t1, t3, shuf_lo); + } + + /* t1 and t3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col2 = t1; + col3 = t3; + + if (count > 2) { + spe_shufb(p, col2, t2, t4, shuf_hi); + } + + if (count > 3) { + spe_shufb(p, col3, t2, t4, shuf_lo); + } + + + /* Store the results. Remember that the stqd instruction is encoded using + * the qword offset (stand-alone assemblers to the byte-offset to + * qword-offset conversion for you), so the byte-offset needs be divided by + * 16. + */ + switch (count) { + case 4: + spe_stqd(p, col3, dest_ptr, 3 * 16); + case 3: + spe_stqd(p, col2, dest_ptr, 2 * 16); + case 2: + spe_stqd(p, col1, dest_ptr, 1 * 16); + case 1: + spe_stqd(p, col0, dest_ptr, 0 * 16); + } + + + /* Release all of the temporary registers used. + */ + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); +} + + +#if 0 +/* This appears to not be used currently */ +static void +emit_fetch(struct spe_function *p, + unsigned in_ptr, unsigned *offset, + unsigned out_ptr, unsigned shuf_ptr, + enum pipe_format format) +{ + const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); + const unsigned type = pf_type(format); + const unsigned bytes = pf_size_x(format); + + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); + int float_zero = -1; + int float_one = -1; + float scale_signed = 0.0; + float scale_unsigned = 0.0; + + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); + offset[0] += 4; + + switch (bytes) { + case 1: + scale_signed = 1.0f / 127.0f; + scale_unsigned = 1.0f / 255.0f; + spe_lqd(p, tmp, shuf_ptr, 1 * 16); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 2: + scale_signed = 1.0f / 32767.0f; + scale_unsigned = 1.0f / 65535.0f; + spe_lqd(p, tmp, shuf_ptr, 2 * 16); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 4: + scale_signed = 1.0f / 2147483647.0f; + scale_unsigned = 1.0f / 4294967295.0f; + break; + default: + assert(0); + break; + } + + switch (type) { + case PIPE_FORMAT_TYPE_FLOAT: + break; + case PIPE_FORMAT_TYPE_UNORM: + spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); + spe_cuflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_SNORM: + spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); + spe_csflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_USCALED: + spe_cuflt(p, v0, v0, 0); + break; + case PIPE_FORMAT_TYPE_SSCALED: + spe_csflt(p, v0, v0, 0); + break; + } + + + if (count < 4) { + float_one = spe_allocate_available_register(p); + spe_il(p, float_one, 1); + spe_cuflt(p, float_one, float_one, 0); + + if (count < 3) { + float_zero = spe_allocate_available_register(p); + spe_il(p, float_zero, 0); + } + } + + spe_release_register(p, tmp); + + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + + switch (count) { + case 1: + spe_stqd(p, float_zero, out_ptr, 1 * 16); + case 2: + spe_stqd(p, float_zero, out_ptr, 2 * 16); + case 3: + spe_stqd(p, float_one, out_ptr, 3 * 16); + } + + if (float_zero != -1) { + spe_release_register(p, float_zero); + } + + if (float_one != -1) { + spe_release_register(p, float_one); + } +} +#endif + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ +#if 0 + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct spe_function *p = &cell->attrib_fetch; + unsigned function_index[PIPE_MAX_ATTRIBS]; + unsigned unique_attr_formats; + int out_ptr; + int in_ptr; + int shuf_ptr; + unsigned i; + unsigned j; + + + /* Determine how many unique input attribute formats there are. At the + * same time, store the index of the lowest numbered attribute that has + * the same format as any non-unique format. + */ + unique_attr_formats = 1; + function_index[0] = 0; + for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + + for (j = 0; j < i; j++) { + if (curr_fmt == draw->vertex_element[j].src_format) { + break; + } + } + + if (j == i) { + unique_attr_formats++; + } + + function_index[i] = j; + } + + + /* Each fetch function can be a maximum of 34 instructions (note: this is + * actually a slight over-estimate). + */ + spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); + + + /* Allocate registers for the function's input parameters. + */ + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); + + + /* Generate code for the individual attribute fetch functions. + */ + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + unsigned offset; + + if (function_index[i] == i) { + cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr + - (void *) p->store); + + offset = 0; + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, + draw->vertex_element[i].src_format); + spe_bi(p, 0, 0, 0); + + /* Round up to the next 16-byte boundary. + */ + if ((((unsigned) p->store) & 0x0f) != 0) { + const unsigned align = ((unsigned) p->store) & 0x0f; + p->store = (uint32_t *) (((void *) p->store) + align); + } + } else { + /* Use the same function entry-point as a previously seen attribute + * with the same format. + */ + cell->attrib_fetch_offsets[i] = + cell->attrib_fetch_offsets[function_index[i]]; + } + } +#else + assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..2b10c116fa --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,146 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ +#if 0 + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + struct cell_attribute_fetch_code *cf; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + cell_update_vertex_fetch(draw); + + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); + batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; + cf = (struct cell_attribute_fetch_code *) (&batch[1]); + cf->base = (uint64_t) cell->attrib_fetch.store; + cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr + - (void *) cell->attrib_fetch.store)); + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format format = draw->vertex_element[i].src_format; + const unsigned count = ((pf_size_x(format) != 0) + + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + + (pf_size_w(format) != 0)); + const unsigned size = pf_size_x(format) * count; + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->size = size; + array_info->function_offset = cell->attrib_fetch_offsets[i]; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + { + uint64_t uniforms = (uintptr_t) draw->user.constants; + + batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); + batch[0] = CELL_CMD_STATE_UNIFORMS; + batch[1] = uniforms; + } + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(cell, CELL_FLUSH_WAIT); + } + + draw->vs.post_nr = draw->vs.queue_nr; + draw->vs.queue_nr = 0; +#else + assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c new file mode 100644 index 0000000000..d570bbd2f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "cell_winsys.h" + + +struct cell_winsys * +cell_get_winsys(uint format) +{ + struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); + if (cws) + cws->preferredFormat = format; + return cws; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 0000000000..ae2af5696b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ + uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 0000000000..2be9a2d324 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 0000000000..116453b79c --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,82 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ + spu_command.c \ + spu_dcache.c \ + spu_funcs.c \ + spu_main.c \ + spu_per_fragment_op.c \ + spu_render.c \ + spu_texture.c \ + spu_tile.c \ + spu_tri.c + +OLD_SOURCES = \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c + + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers + + +.c.o: + $(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: + $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) + $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) + $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) + $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: + rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..d7ce005524 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include <transpose_matrix4x4.h> +#include <spu_intrinsics.h> + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_B8G8R8A8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 3, 3, 3, 3}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, + 0, 0, 0, 0}) ); + return spu_convtf(color_u4, 32); +} + + +/** + * \param color_in - array of 32-bit packed ARGB colors + * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order + */ +static INLINE void +spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], + vector float color_out[4]) +{ + vector unsigned int c0; + + c0 = spu_shuffle(color_in[0], color_in[0], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[0] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[1], color_in[1], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[1] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[2], color_in[2], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[2] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[3], color_in[3], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[3] = spu_convtf(c0, 32); + + _transpose_matrix4x4(color_out, color_out); +} + + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 0000000000..5c0179d954 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,815 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] + ALIGN16_ATTRIB; + + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + ASSERT_ALIGN16(dst); + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } + } + else { + spu.fb.depth_clear_value = clear->value; + } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + } + +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); + } + + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ + + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); + + /* Copy state info (for fallback case only - this will eventually + * go away when the fallback case goes away) + */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); + + /* Make sure the SPU knows which buffers it's expected to read when + * it's told to pull tiles. + */ + spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); + + /* If we're forcing the fallback code to be used (for debug purposes), + * install that. Otherwise install the incoming SPU code. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { + static unsigned int warned = 0; + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + /* The following two lines aren't really necessary if you + * know the debug flags won't change during a run, and if you + * know that the function pointers are initialized correctly. + * We set them here to allow a person to change the debug + * flags during a run (from inside a debugger). + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + + /* Make sure the SPU code buffer is large enough to hold the incoming code. + * Note that we *don't* use align_malloc() and align_free(), because + * those utility functions are *not* available in SPU code. + * */ + if (spu.fragment_ops_code_size < fops->total_code_size) { + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu.fragment_ops_code_size = fops->total_code_size; + spu.fragment_ops_code = malloc(fops->total_code_size); + if (spu.fragment_ops_code == NULL) { + /* Whoops. */ + fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + } + + /* Copy the SPU code from the command buffer to the spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); + + /* Set the pointers for the front-facing and back-facing fragments + * to the specified offsets within the code. Note that if the + * front-facing and back-facing code are the same, they'll have + * the same offset. + */ + spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; + spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; +} + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static uint +cmd_state_fs_constants(const qword *buffer, uint pos) +{ + const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); + const float *constants = (const float *) &buffer[pos+2]; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + + /* Expand each float to float[4] for SOA execution */ + for (i = 0; i < num_const; i++) { + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); + spu.constants[i] = spu_splats(constants[i]); + } + + /* return new buffer pos (in 16-byte words) */ + return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; + break; + case PIPE_FORMAT_Z16_UNORM: + spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; + break; + default: + spu.fb.zsize = 0; + break; + } +} + + +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, + const struct pipe_sampler_state *sampler) +{ + uint i; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + int width = texture->level[i].width; + int height = texture->level[i].height; + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_s = spu_splats(width - 1); + else + texture->level[i].mask_s = spu_splats(~0); + + if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_t = spu_splats(height - 1); + else + texture->level[i].mask_t = spu_splats(~0); + + if (sampler->normalized_coords) { + texture->level[i].scale_s = spu_splats((float) width); + texture->level[i].scale_t = spu_splats((float) height); + } + else { + texture->level[i].scale_s = spu_splats(1.0f); + texture->level[i].scale_t = spu_splats(1.0f); + } + } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ + uint unit = sampler->unit; + + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); + + spu.sampler[unit] = sampler->state; + + switch (spu.sampler[unit].min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + case PIPE_TEX_MIPFILTER_LINEAR: + spu.sample_texture_2d[unit] = sample_texture_2d_lod; + break; + case PIPE_TEX_MIPFILTER_NONE: + spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; + break; + default: + ASSERT(0); + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + const uint unit = texture->unit; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); + + spu.texture[unit].max_level = 0; + spu.texture[unit].target = texture->target; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + uint width = texture->width[i]; + uint height = texture->height[i]; + uint depth = texture->depth[i]; + + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, + texture->start[i], texture->width[i], texture->height[i]); + + spu.texture[unit].level[i].start = texture->start[i]; + spu.texture[unit].level[i].width = width; + spu.texture[unit].level[i].height = height; + spu.texture[unit].level[i].depth = depth; + + spu.texture[unit].level[i].tiles_per_row = + (width + TILE_SIZE - 1) / TILE_SIZE; + + spu.texture[unit].level[i].bytes_per_image = + 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; + + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); + spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); + + if (texture->start[i]) + spu.texture[unit].max_level = i; + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_MAX_ATTRIBS); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); + uint pos; + + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); + release_buffer(buf); + + /* + * Loop over commands in the batch buffer + */ + for (pos = 0; pos < usize; /* no incr */) { + switch (si_to_uint(buffer[pos])) { + /* + * rendering commands + */ + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 16; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return + } + break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 16; + } + break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + /* This is a variant-sized command */ + pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; + } + break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 16; + } + break; + case CELL_CMD_STATE_FS_CONSTANTS: + pos = cmd_state_fs_constants(buffer, pos); + break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 16; + } + break; + case CELL_CMD_STATE_SAMPLER: + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 16; + } + break; + case CELL_CMD_STATE_TEXTURE: + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 16; + } + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; + break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); + pos += 2; + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; + break; + case CELL_CMD_STATE_BIND_VS: +#if 0 + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); +#endif + pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; + break; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; + break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 16; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 16; + } + break; + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; + break; + } + default: + printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); + ASSERT(0); + break; + } + } + + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); +} + + +#define PERF 0 + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ + int exitFlag = 0; + uint t0, t1; + + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); + + while (!exitFlag) { + unsigned opcode; + + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + + if (PERF) + spu_write_decrementer(~0); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + + if (PERF) + t0 = spu_read_decrementer(); + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: +#if 0 + spu_execute_vertex_shader(&draw, &cmd.vs); +#endif + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); + } + + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } + } + + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); + + if (spu.init.debug_flags & CELL_DEBUG_CACHE) + spu_dcache_report(); +} + +/* Initialize this module; we manage the fragment ops buffer here. */ +void +spu_command_init(void) +{ + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + + /* Set up the basic empty buffer for code-gen'ed fragment ops */ + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; +} + +void +spu_command_close(void) +{ + /* Deallocate the code-gen buffer for fragment ops, and reset the + * fragment ops functions to their initial setting (just to leave + * things in a good state). + */ + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu_command_init(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 0000000000..83dcdade28 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +extern void +command_loop(void); + +extern void +spu_command_init(void); + +extern void +spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c new file mode 100644 index 0000000000..a6d67634fd --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -0,0 +1,145 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_dcache.h" + +#define CACHELINE_LOG2SIZE 7 +#define LINE_SIZE (1U << 7) +#define ALIGN_MASK (~(LINE_SIZE - 1)) + +#define CACHE_NAME data +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#ifdef DEBUG +#define CACHE_STATS 1 +#endif +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME data +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +/** + * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst Destination data buffer + * \param ea Main memory effective address of source data + * \param size Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. + */ +void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + const int shift = ea & 0x0f; + const unsigned read_size = ROUNDUP16(size + shift); + const unsigned last_read = ROUNDUP16(ea + size); + const qword *const last_write = dst + (ROUNDUP16(size) / 16); + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < size; i += 16) { + *(dst++) = cache_rd(data, ea + i); + } + } else { + qword hi; + + + /* Please exercise extreme caution when modifying this code. This code + * must not read past the end of the page containing the source data, + * and it must not write more than ((size + 15) / 16) qwords to the + * destination buffer. + */ + ea &= ~0x0f; + hi = cache_rd(data, ea); + for (i = 16; i < read_size; i += 16) { + qword lo = cache_rd(data, ea + i); + + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), + (qword) spu_rlmaskqwbyte(lo, shift - 16)); + hi = lo; + } + + if (dst != last_write) { + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); + } + } + + ASSERT((ea + i) == last_read); + ASSERT(dst == last_write); +} + + +/** + * Notify the cache that a range of main memory may have been modified + */ +void +spu_dcache_mark_dirty(unsigned ea, unsigned size) +{ + unsigned i; + const unsigned aligned_start = (ea & ALIGN_MASK); + const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) + & ALIGN_MASK; + + + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + const unsigned entry = __cache_dir[i]; + const unsigned addr = entry & ~0x0f; + + __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) + ? (entry & ~CACHELINE_VALID) : entry; + } +} + + +/** + * Print cache utilization report + */ +void +spu_dcache_report(void) +{ +#ifdef CACHE_STATS + if (spu.init.id == 0) { + printf("SPU 0: Texture cache report:\n"); + cache_pr_stats(data); + } +#endif +} + + diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h new file mode 100644 index 0000000000..39a19eb31b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -0,0 +1,37 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_DCACHE_H +#define SPU_DCACHE_H + +extern void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); + +extern void +spu_dcache_mark_dirty(unsigned ea, unsigned size); + +extern void +spu_dcache_report(void); + +#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e27df2dfb3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1933 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include <transpose_matrix4x4.h> +#include <simdmath/ceilf4.h> +#include <simdmath/cosf4.h> +#include <simdmath/divf4.h> +#include <simdmath/floorf4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> +#include <simdmath/sinf4.h> +#include <simdmath/sqrtf4.h> +#include <simdmath/truncf4.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + const qword zero = si_il(0); + const qword not_zero = si_il(~0); + + (void) numSamplers; + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + float tmp[4]; + + spu_dcache_fetch_unaligned((qword *) tmp, + (uintptr_t)(ptr + swizzle), + sizeof(float)); + + chan->f[i] = tmp[0]; + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + ASSERT( index->i[0] < (int) mach->ImmLimit ); + ASSERT( index->i[1] < (int) mach->ImmLimit ); + ASSERT( index->i[2] < (int) mach->ImmLimit ); + ASSERT( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + ASSERT( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + ASSERT( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + ASSERT( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + ASSERT( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + ASSERT( 0 ); + break; + + default: + ASSERT( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kil(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); + + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod, boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + if (projected) { + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + ASSERT (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + ASSERT( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + ASSERT (0); + break; + + case TGSI_OPCODE_LOG: + ASSERT (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + ASSERT (0); + break; + + case TGSI_OPCODE_CND0: + ASSERT (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + ASSERT (0); + break; + + case TGSI_OPCODE_INDEX: + ASSERT (0); + break; + + case TGSI_OPCODE_NEGATE: + ASSERT (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + ASSERT (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + ASSERT (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + ASSERT (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + exec_kil (mach, inst); + break; + + case TGSI_OPCODE_PK2H: + ASSERT (0); + break; + + case TGSI_OPCODE_PK2US: + ASSERT (0); + break; + + case TGSI_OPCODE_PK4B: + ASSERT (0); + break; + + case TGSI_OPCODE_PK4UB: + ASSERT (0); + break; + + case TGSI_OPCODE_RFL: + ASSERT (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + ASSERT (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + ASSERT (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + ASSERT (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, TRUE); + break; + + case TGSI_OPCODE_UP2H: + ASSERT (0); + break; + + case TGSI_OPCODE_UP2US: + ASSERT (0); + break; + + case TGSI_OPCODE_UP4B: + ASSERT (0); + break; + + case TGSI_OPCODE_UP4UB: + ASSERT (0); + break; + + case TGSI_OPCODE_X2D: + ASSERT (0); + break; + + case TGSI_OPCODE_ARA: + ASSERT (0); + break; + + case TGSI_OPCODE_ARR: + ASSERT (0); + break; + + case TGSI_OPCODE_BRA: + ASSERT (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + ASSERT(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + ASSERT(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + ASSERT(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + ASSERT (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + ASSERT (0); + break; + + case TGSI_OPCODE_DIV: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + ASSERT(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + ASSERT(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + ASSERT (0); + break; + + case TGSI_OPCODE_ENDREP: + ASSERT (0); + break; + + case TGSI_OPCODE_PUSHA: + ASSERT (0); + break; + + case TGSI_OPCODE_POPA: + ASSERT (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + ASSERT (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + ASSERT (0); + break; + + case TGSI_OPCODE_TXF: + ASSERT (0); + break; + + case TGSI_OPCODE_TXQ: + ASSERT (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + ASSERT(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + ASSERT( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ + ASSERT(mach->CondStackTop == 0); + ASSERT(mach->LoopStackTop == 0); + ASSERT(mach->ContStackTop == 0); + ASSERT(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + union { + struct tgsi_full_declaration decl; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; + } d ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Declarations + pc); + + spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); + + exec_declaration( mach, &d.decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + union { + struct tgsi_full_instruction inst; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; + } i ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Instructions + pc); + + spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); + exec_instruction( mach, & i.inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 0000000000..8605679940 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "tgsi/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 0000000000..ff3d609d25 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,173 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include <string.h> +#include <libmisc.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> +#include <simdmath/exp2f4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" +#include "spu_texture.h" + + +/** For "return"-ing four vectors */ +struct vec_4x4 +{ + vector float v[4]; +}; + + +static vector float +spu_cos(vector float x) +{ + return _cos14_v(x); +} + +static vector float +spu_sin(vector float x) +{ + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + return _powf4(x, y); +} + +static vector float +spu_exp2(vector float x) +{ + return _exp2f4(x); +} + +static vector float +spu_log2(vector float x) +{ + return _log2f4(x); +} + + +static struct vec_4x4 +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) q; + sample_texture_cube(s, t, r, unit, colors.v); + return colors; +} + + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ +static void +export_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) +{ + uint n = spu_functions->num; + ASSERT(strlen(name) < 16); + strcpy(spu_functions->names[n], name); + spu_functions->addrs[n] = (uint) addr; + spu_functions->num++; + ASSERT(spu_functions->num <= 16); +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ + struct cell_spu_function_info funcs ALIGN16_ATTRIB; + int tag = TAG_MISC; + + ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + + funcs.num = 0; + export_func(&funcs, "spu_cos", &spu_cos); + export_func(&funcs, "spu_sin", &spu_sin); + export_func(&funcs, "spu_pow", &spu_pow); + export_func(&funcs, "spu_exp2", &spu_exp2); + export_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_tex_2d", &spu_tex_2d); + export_func(&funcs, "spu_tex_3d", &spu_tex_3d); + export_func(&funcs, "spu_tex_cube", &spu_tex_cube); + + /* Send the function info back to the PPU / main memory */ + mfc_put((void *) &funcs, /* src in local store */ + (unsigned int) spu.init.spu_functions, /* dst in main memory */ + sizeof(funcs), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << tag); +} + + + diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h new file mode 100644 index 0000000000..3adb6ae99f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_FUNCS_H +#define SPU_FUNCS_H + +extern void +return_function_info(void); + +#endif + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 0000000000..97c86d194d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_funcs.h" +#include "spu_command.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +//#include "spu_test.h" +#include "cell/common.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/cell/sdk/usr/include/libmisc.h +*/ + +struct spu_global spu; + + +static void +one_time_init(void) +{ + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); +} + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter. In others it takes 'unsigned long long'. Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ + int tag = 0; + + (void) speid; + + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); + + one_time_init(); + spu_command_init(); + + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + + /* get initialization data */ + mfc_get(&spu.init, /* dest */ + (unsigned int) argp, /* src */ + sizeof(struct cell_init_info), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + if (spu.init.id == 0) { + return_function_info(); + } + +#if 0 + if (spu.init.id==0) + spu_test_misc(spu.init.id); +#endif + + command_loop(); + + spu_command_close(); + + return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 0000000000..33767e7c51 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,254 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include <spu_mfcio.h> + +#include "cell/common.h" +#include "draw/draw_vertex.h" +#include "pipe/p_state.h" + + +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + + +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + +/** Function for sampling textures */ +typedef void (*spu_sample_texture_2d_func)(vector float s, + vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +/** Function for performing per-fragment ops */ +typedef void (*spu_fragment_ops_func)(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + +/** Function for running fragment program */ +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); + + +struct spu_framebuffer +{ + void *color_start; /**< addr of color surface in main memory */ + void *depth_start; /**< addr of depth surface in main memory */ + enum pipe_format color_format; + enum pipe_format depth_format; + uint width, height; /**< size in pixels */ + uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; + + uint zsize; /**< 0, 2 or 4 bytes per Z */ + float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ +} ALIGN16_ATTRIB; + + +/** per-texture level info */ +struct spu_texture_level +{ + void *start; + ushort width, height, depth; + ushort tiles_per_row; + uint bytes_per_image; + /** texcoord scale factors */ + vector float scale_s, scale_t, scale_r; + /** texcoord masks (if REPEAT then size-1, else ~0) */ + vector signed int mask_s, mask_t, mask_r; + /** texcoord clamp limits */ + vector signed int max_s, max_t, max_r; +} ALIGN16_ATTRIB; + + +struct spu_texture +{ + struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; + uint max_level; + uint target; /**< PIPE_TEXTURE_x */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in a singleton object of this type: + */ +struct spu_global +{ + /** One-time init/constant info */ + struct cell_init_info init; + + /* + * Current state + */ + struct spu_framebuffer fb; + struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; + struct spu_texture texture[PIPE_MAX_SAMPLERS]; + struct vertex_info vertex_info; + + /** Current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Read depth/stencil tiles? */ + boolean read_depth_stencil; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + /** Current fragment ops machine code, at 8-byte boundary */ + uint *fragment_ops_code; + uint fragment_ops_code_size; + /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ + spu_fragment_ops_func fragment_ops[2]; + + /** Current fragment program machine code, at 8-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + /** Current fragment ops function */ + spu_fragment_program_func fragment_program; + + /** Current texture sampler function */ + spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; + + /** Fragment program constants */ + vector float constants[4 * CELL_MAX_CONSTANTS]; + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR 10 +#define TAG_VERTEX_BUFFER 11 +#define TAG_READ_TILE_COLOR 12 +#define TAG_READ_TILE_Z 13 +#define TAG_WRITE_TILE_COLOR 14 +#define TAG_WRITE_TILE_Z 15 +#define TAG_INDEX_BUFFER 16 +#define TAG_BATCH_BUFFER 17 +#define TAG_MISC 18 +#define TAG_DCACHE0 20 +#define TAG_DCACHE1 21 +#define TAG_DCACHE2 22 +#define TAG_DCACHE3 23 +#define TAG_FENCE 24 + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 0000000000..683664e8a4 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,631 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \author Brian Paul + */ + + +#include <transpose_matrix4x4.h> +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_colorpack.h" +#include "spu_per_fragment_op.h" + + +#define LINEAR_QUAD_LAYOUT 1 + + +static INLINE vector float +spu_min(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(b, a, m); +} + + +/** + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, + vector unsigned int mask) +{ + vector float frag_aos[4]; + unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ + unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ + + /* + * Do alpha test + */ + if (spu.depth_stencil_alpha.alpha.enabled) { + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector unsigned int amask; + + switch (spu.depth_stencil_alpha.alpha.func) { + case PIPE_FUNC_LESS: + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ + break; + case PIPE_FUNC_GREATER: + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ + break; + case PIPE_FUNC_GEQUAL: + amask = spu_cmpgt(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_LEQUAL: + amask = spu_cmpgt(fragA, ref); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_EQUAL: + amask = spu_cmpeq(ref, fragA); + break; + case PIPE_FUNC_NOTEQUAL: + amask = spu_cmpeq(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_ALWAYS: + amask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + amask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, amask); + } + + + /* + * Z and/or stencil testing... + */ + if (spu.depth_stencil_alpha.depth.enabled || + spu.depth_stencil_alpha.stencil[0].enabled) { + + /* get four Z/Stencil values from tile */ + vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); + vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; + vector unsigned int ifbZ = spu_and(ifbZS, mask24); + vector unsigned int ifbS = spu_andc(ifbZS, mask24); + + if (spu.depth_stencil_alpha.stencil[0].enabled) { + /* do stencil test */ + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + + } + else if (spu.depth_stencil_alpha.depth.enabled) { + /* do depth test */ + + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || + spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + + vector unsigned int ifragZ; + vector unsigned int zmask; + + /* convert four fragZ from float to uint */ + fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); + ifragZ = spu_convtu(fragZ, 0); + + /* do depth comparison, setting zmask with results */ + switch (spu.depth_stencil_alpha.depth.func) { + case PIPE_FUNC_LESS: + zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ + break; + case PIPE_FUNC_GREATER: + zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ + break; + case PIPE_FUNC_GEQUAL: + zmask = spu_cmpgt(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_LEQUAL: + zmask = spu_cmpgt(ifragZ, ifbZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_EQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + break; + case PIPE_FUNC_NOTEQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_ALWAYS: + zmask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + zmask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, zmask); + + /* merge framebuffer Z and fragment Z according to the mask */ + ifbZ = spu_or(spu_and(ifragZ, mask), + spu_andc(ifbZ, mask)); + } + + if (spu_extract(spu_orx(mask), 0)) { + /* put new fragment Z/Stencil values back into Z/Stencil tile */ + depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + } + + + /* + * If we'll need the current framebuffer/tile colors for blending + * or logicop or colormask, fetch them now. + */ + if (spu.blend.blend_enable || + spu.blend.logicop_enable || + spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + fbc0 = colorTile->ui[y][x*2+0]; + fbc1 = colorTile->ui[y][x*2+1]; + fbc2 = colorTile->ui[y][x*2+2]; + fbc3 = colorTile->ui[y][x*2+3]; +#else + fbc0 = colorTile->ui[y+0][x+0]; + fbc1 = colorTile->ui[y+0][x+1]; + fbc2 = colorTile->ui[y+1][x+0]; + fbc3 = colorTile->ui[y+1][x+1]; +#endif + } + + + /* + * Do blending + */ + if (spu.blend.blend_enable) { + /* blending terms, misc regs */ + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + vector float one, tmp; + + vector float fbRGBA[4]; /* current framebuffer colors */ + + /* convert framebuffer colors from packed int to vector float */ + { + vector float temp[4]; /* float colors in AOS form */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + temp[0] = spu_unpack_B8G8R8A8(fbc0); + temp[1] = spu_unpack_B8G8R8A8(fbc1); + temp[2] = spu_unpack_B8G8R8A8(fbc2); + temp[3] = spu_unpack_B8G8R8A8(fbc3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + temp[0] = spu_unpack_A8R8G8B8(fbc0); + temp[1] = spu_unpack_A8R8G8B8(fbc1); + temp[2] = spu_unpack_A8R8G8B8(fbc2); + temp[3] = spu_unpack_A8R8G8B8(fbc3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ + } + + /* + * Compute Src RGB terms (fragment color * factor) + */ + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + term1r = spu_mul(fragR, fbRGBA[0]); + term1g = spu_mul(fragG, fbRGBA[1]); + term1b = spu_mul(fragB, fbRGBA[1]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term1r = spu_mul(fragR, fbRGBA[3]); + term1g = spu_mul(fragG, fbRGBA[3]); + term1b = spu_mul(fragB, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term (fragment alpha * factor) + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term1a = spu_mul(fragA, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms (framebuffer color * factor) + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fbRGBA[0]; + term2g = fbRGBA[1]; + term2b = fbRGBA[2]; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + term2r = spu_mul(fbRGBA[0], fbRGBA[0]); + term2g = spu_mul(fbRGBA[1], fbRGBA[1]); + term2b = spu_mul(fbRGBA[2], fbRGBA[2]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term2r = spu_mul(fbRGBA[0], fbRGBA[3]); + term2g = spu_mul(fbRGBA[1], fbRGBA[3]); + term2b = spu_mul(fbRGBA[2], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term (framebuffer alpha * factor) + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fbRGBA[3]; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term2a = spu_mul(fbRGBA[3], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragR = spu_sub(term2r, term1r); + fragG = spu_sub(term2g, term1g); + fragB = spu_sub(term2b, term1b); + break; + case PIPE_BLEND_MIN: + fragR = spu_min(term1r, term2r); + fragG = spu_min(term1g, term2g); + fragB = spu_min(term1b, term2b); + break; + case PIPE_BLEND_MAX: + fragR = spu_max(term1r, term2r); + fragG = spu_max(term1g, term2g); + fragB = spu_max(term1b, term2b); + break; + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragA = spu_sub(term2a, term1a); + break; + case PIPE_BLEND_MIN: + fragA = spu_min(term1a, term2a); + break; + case PIPE_BLEND_MAX: + fragA = spu_max(term1a, term2a); + break; + default: + ASSERT(0); + } + } + + + /* + * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. + */ +#if 0 + /* original code */ + { + vector float frag_soa[4]; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; + _transpose_matrix4x4(frag_aos, frag_soa); + } +#else + /* short-cut relying on function parameter layout: */ + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; +#endif + + /* + * Pack fragment float colors into 32-bit RGBA words. + */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); + fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); + fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); + fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); + fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); + fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); + fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); + break; + default: + fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); + ASSERT(0); + } + + + /* + * Do color masking + */ + if (spu.blend.colormask != 0xf) { + uint cmask = 0x0; /* each byte corresponds to a color channel */ + + /* Form bitmask depending on color buffer format and colormask bits */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x00ff0000; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x0000ff00; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0x000000ff; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0xff000000; /* alpha */ + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x0000ff00; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x00ff0000; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0xff000000; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0x000000ff; /* alpha */ + break; + default: + ASSERT(0); + } + + /* + * Apply color mask to the 32-bit packed colors. + * if (cmask[i]) + * frag color[i] = frag color[i]; + * else + * frag color[i] = framebuffer color[i]; + */ + fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); + fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); + fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); + fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); + } + + + /* + * Do logic ops + */ + if (spu.blend.logicop_enable) { + /* XXX to do */ + /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ + } + + + /* + * If mask is non-zero, mark tile as dirty. + */ + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + else { + /* write no fragments */ + return; + } + + + /* + * Write new fragment/quad colors to the framebuffer/tile. + * Only write pixels where the corresponding mask word is set. + */ +#if LINEAR_QUAD_LAYOUT + /* + * Quad layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|... + * +--+--+--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y][x*2] = fragc0; + if (spu_extract(mask, 1)) + colorTile->ui[y][x*2+1] = fragc1; + if (spu_extract(mask, 2)) + colorTile->ui[y][x*2+2] = fragc2; + if (spu_extract(mask, 3)) + colorTile->ui[y][x*2+3] = fragc3; +#else + /* + * Quad layout: + * +--+--+ + * |p0|p1|... + * +--+--+ + * |p2|p3|... + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = fragc0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = fragc1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = fragc2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = fragc3; +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 0000000000..f817abf046 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,44 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + + +extern void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..7c225e2f27 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,295 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <libmisc.h> +#include <spu_mfcio.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "cell/common.h" +#include "util/u_memory.h" + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.read_depth_stencil) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.read_depth_stencil) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + uint num_tiles; + + D_PRINTF(CELL_DEBUG_CMD, + "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + num_tiles = 0; + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + num_tiles++; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); +} diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 0000000000..493434f087 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h new file mode 100644 index 0000000000..74f2a0b6d2 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_shuffle.h @@ -0,0 +1,186 @@ +#ifndef SPU_SHUFFLE_H +#define SPU_SHUFFLE_H + +/* + * Generate shuffle patterns with minimal fuss. + * + * Based on ideas from + * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf + * + * A-P indicates 0-15th position in first vector + * a-p indicates 0-15th position in second vector + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | A| B| C| D| E| F| G| H| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * + * x or X indicates 0xff + * 8 indicates 0x80 + * 0 indicates 0x00 + * + * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector + * unsigned char literal suitable for use with spu_shuffle(). + * + * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector + * literal suitable for use with si_shufb(). + * + * + * For example : + * SHUFB4(A,A,A,A) + * expands to : + * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) + * + * SHUFFLE8(A,B,a,b,C,c,8,8) + * expands to : + * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, + * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) + * + */ + +#include <spu_intrinsics.h> + +#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 +#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 +#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b +#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f +#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 +#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 +#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b +#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f +#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 +#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 + +#define SHUFFLE_VECTOR_4__(A, B, C, D) \ + SHUFFLE_PATTERN_4_##A##__, \ + SHUFFLE_PATTERN_4_##B##__, \ + SHUFFLE_PATTERN_4_##C##__, \ + SHUFFLE_PATTERN_4_##D##__ + +#define SHUFFLE4(A, B, C, D) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + +#define SHUFB4(A, B, C, D) \ + ((const qword){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + + +#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 +#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 +#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 +#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 +#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 +#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b +#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d +#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f +#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 +#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 +#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 +#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 +#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 +#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b +#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d +#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f +#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 +#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 + + +#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + SHUFFLE_PATTERN_8_##A##__, \ + SHUFFLE_PATTERN_8_##B##__, \ + SHUFFLE_PATTERN_8_##C##__, \ + SHUFFLE_PATTERN_8_##D##__, \ + SHUFFLE_PATTERN_8_##E##__, \ + SHUFFLE_PATTERN_8_##F##__, \ + SHUFFLE_PATTERN_8_##G##__, \ + SHUFFLE_PATTERN_8_##H##__ + +#define SHUFFLE8(A, B, C, D, E, F, G, H) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + +#define SHUFB8(A, B, C, D, E, F, G, H) \ + ((const qword){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + + +#define SHUFFLE_PATTERN_16_A__ 0x00 +#define SHUFFLE_PATTERN_16_B__ 0x01 +#define SHUFFLE_PATTERN_16_C__ 0x02 +#define SHUFFLE_PATTERN_16_D__ 0x03 +#define SHUFFLE_PATTERN_16_E__ 0x04 +#define SHUFFLE_PATTERN_16_F__ 0x05 +#define SHUFFLE_PATTERN_16_G__ 0x06 +#define SHUFFLE_PATTERN_16_H__ 0x07 +#define SHUFFLE_PATTERN_16_I__ 0x08 +#define SHUFFLE_PATTERN_16_J__ 0x09 +#define SHUFFLE_PATTERN_16_K__ 0x0a +#define SHUFFLE_PATTERN_16_L__ 0x0b +#define SHUFFLE_PATTERN_16_M__ 0x0c +#define SHUFFLE_PATTERN_16_N__ 0x0d +#define SHUFFLE_PATTERN_16_O__ 0x0e +#define SHUFFLE_PATTERN_16_P__ 0x0f +#define SHUFFLE_PATTERN_16_a__ 0x10 +#define SHUFFLE_PATTERN_16_b__ 0x11 +#define SHUFFLE_PATTERN_16_c__ 0x12 +#define SHUFFLE_PATTERN_16_d__ 0x13 +#define SHUFFLE_PATTERN_16_e__ 0x14 +#define SHUFFLE_PATTERN_16_f__ 0x15 +#define SHUFFLE_PATTERN_16_g__ 0x16 +#define SHUFFLE_PATTERN_16_h__ 0x17 +#define SHUFFLE_PATTERN_16_i__ 0x18 +#define SHUFFLE_PATTERN_16_j__ 0x19 +#define SHUFFLE_PATTERN_16_k__ 0x1a +#define SHUFFLE_PATTERN_16_l__ 0x1b +#define SHUFFLE_PATTERN_16_m__ 0x1c +#define SHUFFLE_PATTERN_16_n__ 0x1d +#define SHUFFLE_PATTERN_16_o__ 0x1e +#define SHUFFLE_PATTERN_16_p__ 0x1f +#define SHUFFLE_PATTERN_16_X__ 0xc0 +#define SHUFFLE_PATTERN_16_x__ 0xc0 +#define SHUFFLE_PATTERN_16_0__ 0x80 +#define SHUFFLE_PATTERN_16_8__ 0xe0 + +#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + SHUFFLE_PATTERN_16_##A##__, \ + SHUFFLE_PATTERN_16_##B##__, \ + SHUFFLE_PATTERN_16_##C##__, \ + SHUFFLE_PATTERN_16_##D##__, \ + SHUFFLE_PATTERN_16_##E##__, \ + SHUFFLE_PATTERN_16_##F##__, \ + SHUFFLE_PATTERN_16_##G##__, \ + SHUFFLE_PATTERN_16_##H##__, \ + SHUFFLE_PATTERN_16_##I##__, \ + SHUFFLE_PATTERN_16_##J##__, \ + SHUFFLE_PATTERN_16_##K##__, \ + SHUFFLE_PATTERN_16_##L##__, \ + SHUFFLE_PATTERN_16_##M##__, \ + SHUFFLE_PATTERN_16_##N##__, \ + SHUFFLE_PATTERN_16_##O##__, \ + SHUFFLE_PATTERN_16_##P##__ + +#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const qword){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#endif diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 0000000000..69784c8978 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,641 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <math.h> + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" +#include "spu_dcache.h" + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + uint lvl; + for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { + uint unit = 0; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; + + if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) + bytes *= 6; + else if (spu.texture[unit].target == PIPE_TEXTURE_3D) + bytes *= spu.texture[unit].level[lvl].depth; + + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + } +} + + +/** + * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + * + * NOTE: in the typical case of bilinear filtering, the four texels + * are in a 2x2 group so we could get by with just two dcache fetches + * (two side-by-side texels per fetch). But when bilinear filtering + * wraps around a texture edge, we'll probably need code like we have + * now. + * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, + * it's quite likely that the four pixels in a quad will need some of the + * same texels. So look into doing texture fetches for four pixels at + * a time. + */ +static void +get_four_texels(const struct spu_texture_level *tlevel, uint face, + vec_int4 x, vec_int4 y, + vec_uint4 *texels) +{ + unsigned texture_ea = (uintptr_t) tlevel->start; + const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ + const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ + + const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); + const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); + + qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); + tile_offset = si_mpy((qword) tile_offset, tile_size); + + qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); + texel_offset = si_mpyui(texel_offset, 4); + + vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + + texture_ea = texture_ea + face * tlevel->bytes_per_image; + + spu_dcache_fetch_unaligned((qword *) & texels[0], + texture_ea + spu_extract(offset, 0), 4); + spu_dcache_fetch_unaligned((qword *) & texels[1], + texture_ea + spu_extract(offset, 1), 4); + spu_dcache_fetch_unaligned((qword *) & texels[2], + texture_ea + spu_extract(offset, 2), 4); + spu_dcache_fetch_unaligned((qword *) & texels[3], + texture_ea + spu_extract(offset, 3), 4); +} + + +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ + static const vector signed int zero = {0,0,0,0}; + vector unsigned int c; + c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ + vec = spu_sel(zero, vec, c); + c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ + vec = spu_sel(vec, max, c); + return vec; +} + + + +/** + * Do nearest texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_mul(s, tlevel->scale_s); + vector float tt = spu_mul(t, tlevel->scale_t); + vector signed int is = spu_convts(ss, 0); + vector signed int it = spu_convts(tt, 0); + vec_uint4 texels[4]; + + /* PIPE_TEX_WRAP_REPEAT */ + is = spu_and(is, tlevel->mask_s); + it = spu_and(it, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is = spu_clamp(is, tlevel->max_s); + it = spu_clamp(it, tlevel->max_t); + + get_four_texels(tlevel, face, is, it, texels); + + /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ + spu_unpack_A8R8G8B8_transpose4(texels, colors); +} + + +/** + * Do bilinear texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); + + vector signed int is0 = spu_convts(ss, 0); + vector signed int it0 = spu_convts(tt, 0); + + /* is + 1, it + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + + /* convert packed int texels to float colors */ + vector float ftexels[16]; + spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); + spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); + spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); + spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); + vector signed int iss1024 = spu_convts(ss1024, 0); + iss1024 = spu_and(iss1024, 1023); + vector float sWeights0 = spu_convtf(iss1024, 10); + + vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); + vector signed int itt1024 = spu_convts(tt1024, 0); + itt1024 = spu_and(itt1024, 1023); + vector float tWeights0 = spu_convtf(itt1024, 10); + + /* 1 - sWeight and 1 - tWeight */ + vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); + vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + + /* reds, for four pixels */ + ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), + spu_add(ftexels[8], ftexels[12])); + + /* greens, for four pixels */ + ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), + spu_add(ftexels[9], ftexels[13])); + + /* blues, for four pixels */ + ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), + spu_add(ftexels[10], ftexels[14])); + + /* alphas, for four pixels */ + ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), + spu_add(ftexels[11], ftexels[15])); +} + + + +/** + * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h + */ +static INLINE void +transpose(vector unsigned int *mOut0, + vector unsigned int *mOut1, + vector unsigned int *mOut2, + vector unsigned int *mOut3, + vector unsigned int *mIn) +{ + vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ + vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ + vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ + + vector unsigned char shufflehi = ((vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}); + vector unsigned char shufflelo = ((vector unsigned char) { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}); + abcd = *(mIn+0); + efgh = *(mIn+1); + ijkl = *(mIn+2); + mnop = *(mIn+3); + + aibj = spu_shuffle(abcd, ijkl, shufflehi); + ckdl = spu_shuffle(abcd, ijkl, shufflelo); + emfn = spu_shuffle(efgh, mnop, shufflehi); + gohp = spu_shuffle(efgh, mnop, shufflelo); + + aeim = spu_shuffle(aibj, emfn, shufflehi); + bfjn = spu_shuffle(aibj, emfn, shufflelo); + cgko = spu_shuffle(ckdl, gohp, shufflehi); + dhlp = spu_shuffle(ckdl, gohp, shufflelo); + + *mOut0 = aeim; + *mOut1 = bfjn; + *mOut2 = cgko; + *mOut3 = dhlp; +} + + +/** + * Bilinear filtering, using int instead of float arithmetic for computing + * sample weights. + */ +void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + /* Scale texcoords by size of texture, and add half pixel bias */ + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); + + /* convert float coords to fixed-pt coords with 7 fraction bits */ + vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ + vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ + + /* compute integer texel weights in [0, 127] */ + vector signed int sWeights0 = spu_and(is, 127); + vector signed int tWeights0 = spu_and(it, 127); + vector signed int sWeights1 = spu_sub(127, sWeights0); + vector signed int tWeights1 = spu_sub(127, tWeights0); + + /* texel coords: is0 = is / 128, it0 = is / 128 */ + vector signed int is0 = spu_rlmask(is, -7); + vector signed int it0 = spu_rlmask(it, -7); + + /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + + /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ + { + static const unsigned char ZERO = 0x80; + int i; + for (i = 0; i < 16; i++) { + texels[i] = spu_shuffle(texels[i], texels[i], + ((vector unsigned char) { + ZERO, ZERO, ZERO, 1, + ZERO, ZERO, ZERO, 2, + ZERO, ZERO, ZERO, 3, + ZERO, ZERO, ZERO, 0})); + } + } + + /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ + vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, + texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; + transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); + transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); + transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); + transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); + + /* computed weighted colors */ + vector unsigned int c0, c1, c2, c3, cSum; + + /* red */ + c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[0] = spu_convtf(cSum, 22); + + /* green */ + c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[1] = spu_convtf(cSum, 22); + + /* blue */ + c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[2] = spu_convtf(cSum, 22); + + /* alpha */ + c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[3] = spu_convtf(cSum, 22); +} + + + +/** + * Compute level of detail factor from texcoords. + */ +static INLINE float +compute_lambda_2d(uint unit, vector float s, vector float t) +{ + uint baseLevel = 0; + float width = spu.texture[unit].level[baseLevel].width; + float height = spu.texture[unit].level[baseLevel].width; + float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); + float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); + float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); + float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); +#if 0 + /* ideal value */ + float x = dsdx * dsdx + dtdx * dtdx; + float y = dsdy * dsdy + dtdy * dtdy; + float rho = x > y ? x : y; + rho = sqrtf(rho); +#else + /* approximation */ + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; +#endif + float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ + return lambda; +} + + +/** + * Blend two sets of colors according to weight. + */ +static void +blend_colors(vector float c0[4], const vector float c1[4], float weight) +{ + vector float t = spu_splats(weight); + vector float dc0 = spu_sub(c1[0], c0[0]); + vector float dc1 = spu_sub(c1[1], c0[1]); + vector float dc2 = spu_sub(c1[2], c0[2]); + vector float dc3 = spu_sub(c1[3], c0[3]); + c0[0] = spu_madd(dc0, t, c0[0]); + c0[1] = spu_madd(dc1, t, c0[1]); + c0[2] = spu_madd(dc2, t, c0[2]); + c0[3] = spu_madd(dc3, t, c0[3]); +} + + +/** + * Texture sampling with level of detail selection and possibly mipmap + * interpolation. + */ +void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level_ignored, uint face, + vector float colors[4]) +{ + /* + * Note that we're computing a lambda/lod here that's used for all + * four pixels in the quad. + */ + float lambda = compute_lambda_2d(unit, s, t); + + (void) face; + (void) level_ignored; + + /* apply lod bias */ + lambda += spu.sampler[unit].lod_bias; + + /* clamp */ + if (lambda < spu.sampler[unit].min_lod) + lambda = spu.sampler[unit].min_lod; + else if (lambda > spu.sampler[unit].max_lod) + lambda = spu.sampler[unit].max_lod; + + if (lambda <= 0.0f) { + /* magnify */ + spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); + } + else { + /* minify */ + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample two mipmap levels and interpolate */ + int level = (int) lambda; + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample second mipmap level */ + float weight = lambda - (float) level; + level++; + if (level <= (int) spu.texture[unit].max_level) { + vector float colors2[4]; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); + blend_colors(colors, colors2, weight); + } + } + } + else { + /* sample one mipmap level */ + int level = (int) (lambda + 0.5f); + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + } + } +} + + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx); + const float ary = fabsf(ry); + const float arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = (sc / ma + 1.0F) * 0.5F; + *newT = (tc / ma + 1.0F) * 0.5F; + + return face; +} + + + +void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]) +{ + uint p, faces[4], level = 0; + float newS[4], newT[4]; + + /* Compute cube faces referenced by the four sets of texcoords. + * XXX we should SIMD-ize this. + */ + for (p = 0; p < 4; p++) { + float rx = spu_extract(s, p); + float ry = spu_extract(t, p); + float rz = spu_extract(r, p); + faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); + } + + if (faces[0] == faces[1] && + faces[0] == faces[2] && + faces[0] == faces[3]) { + /* GOOD! All four texcoords refer to the same cube face */ + s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; + t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; + spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); + } + else { + /* BAD! The four texcoords refer to different faces */ + for (p = 0; p < 4; p++) { + vector float c[4]; + + spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); + + float red = spu_extract(c[0], p); + float green = spu_extract(c[1], p); + float blue = spu_extract(c[2], p); + float alpha = spu_extract(c[3], p); + + colors[0] = spu_insert(red, colors[0], p); + colors[1] = spu_insert(green, colors[1], p); + colors[2] = spu_insert(blue, colors[2], p); + colors[3] = spu_insert(alpha, colors[3], p); + } + } +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 0000000000..7b75b007b5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 0000000000..6905015a48 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +/** + * Get tile of color or Z values from main memory, put into SPU memory. + */ +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + src += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("get_tile: dest: %p src: 0x%x size: %d\n", + tile, (unsigned int) src, bytesPerTile); + */ + mfc_get(tile->ui, /* dest in local memory */ + (unsigned int) src, /* src in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +/** + * Move tile of color or Z values from SPU memory to main memory. + */ +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + dst += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", + spu.init.id, + tile, (unsigned int) dst, bytesPerTile); + */ + mfc_put((void *) tile->ui, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 0000000000..7bfb52be8f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include <libmisc.h> +#include <spu_mfcio.h> +#include "spu_main.h" +#include "cell/common.h" + + + +extern void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +extern void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + +extern void +really_clear_tiles(uint surfaceIndex); + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ + memset32((uint*) ctile->ui, + spu.fb.color_clear_value, + TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ + if (spu.fb.zsize == 2) { + memset16((ushort*) ztile->us, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } + else { + ASSERT(spu.fb.zsize != 0); + memset32((uint*) ztile->ui, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 0000000000..0d9fcb9997 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,809 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include <transpose_matrix4x4.h> +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "util/u_math.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_shuffle.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { + vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999f)) + + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + union { + struct { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + }; + vec_float4 ds; /**< vector accessor for dx and dy */ + }; + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +struct interp_coef +{ + vector float a0; + vector float dadx; + vector float dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + union { + struct { + const struct vertex_header *vmin; + const struct vertex_header *vmid; + const struct vertex_header *vmax; + const struct vertex_header *vprovoke; + }; + qword vertex_headers; + }; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneOverArea; /* XXX maybe make into vector? */ + + uint facing; + + uint tx, ty; /**< position of current tile (x, y) */ + + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + + struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + + struct { + vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + +static struct setup_stage setup; + + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be computed (in AoS format). + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) +{ + switch (spu.vertex_info.attrib[slot].interp_mode) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; + break; + case INTERP_LINEAR: + { + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } + break; + case INTERP_PERSPECTIVE: + { + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + vector float wInv = spu_re(w); /* 1.0 / w */ + + result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); + result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); + result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); + result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); + } + break; + case INTERP_POS: + case INTERP_NONE: + break; + default: + ASSERT(0); + } +} + + +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) +{ + eval_coeff(slot, x, y, w, result); + _transpose_matrix4x4(result, result); +} + + +/** Evalute coefficients to get Z for four pixels in a quad */ +static INLINE vector float +eval_z(float x, float y) +{ + const uint slot = 0; + const float dzdx = spu_extract(setup.coef[slot].dadx, 2); + const float dzdy = spu_extract(setup.coef[slot].dady, 2); + const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); +} + + +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ + const uint slot = 0; + const float dwdx = spu_extract(setup.coef[slot].dadx, 3); + const float dwdy = spu_extract(setup.coef[slot].dady, 3); + const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; + return spu_add(topLeftv, derivs); +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask) +{ + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + spu.cur_ztile_status = TILE_STATUS_DIRTY; + + { + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. + */ + vector float inputs[4*4], outputs[2*4]; + vector float fragZ = eval_z((float) x, (float) y); + vector float fragW = eval_w((float) x, (float) y); + vector unsigned int kill_mask; + + /* setup inputs */ +#if 0 + eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); +#else + uint i; + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); + } +#endif + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); + + /* Execute the current fragment program */ + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); + + mask = spu_andc(mask, kill_mask); + + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + * Note that there are two different fragment operations functions + * that can be called, one for front-facing fragments, and one + * for back-facing fragments. (Often the two are the same; + * but in some cases, like two-sided stenciling, they can be + * very different.) So choose the correct function depending + * on the calculated facing. + */ + spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], + mask); + } + } +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int +block(int x) +{ + return x & ~1; +} + + +/** + * Render a horizontal span of quads + */ +static void +flush_spans(void) +{ + int minleft, maxright; + + const int l0 = spu_extract(setup.span.quad, 0); + const int l1 = spu_extract(setup.span.quad, 1); + const int r0 = spu_extract(setup.span.quad, 2); + const int r1 = spu_extract(setup.span.quad, 3); + + switch (setup.span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(l0, l1); + maxright = MAX2(r0, r1); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = l0; + maxright = r0; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = l1; + maxright = r1; + break; + + default: + return; + } + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.read_depth_stencil) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + + /* XXX this loop could be moved into the above switch cases... */ + + /* Setup for mask calculation */ + const vec_int4 quad_LlRr = setup.span.quad; + const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); + const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); + const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); + + const vec_int4 twos = spu_splats(2); + + const int x = block(minleft); + vec_int4 xs = {x, x+1, x, x+1}; + + for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { + /** + * Computes mask to indicate which pixels in the 2x2 quad are actually + * inside the triangle's bounds. + */ + + /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ + const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); + const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); + + /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ + const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); + + /* Combine results to create mask */ + const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); + + emit_quad(spu_extract(xs, 0), setup.span.y, mask); + } + + setup.span.y = 0; + setup.span.y_flags = 0; + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); +} + + +#if DEBUG_VERTS +static void +print_vertex(const struct vertex_header *v) +{ + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); + } +} +#endif + + +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return FALSE if tri is totally outside tile, TRUE otherwise + */ +static boolean +setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) +{ + float area, sign; + +#if DEBUG_VERTS + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } +#endif + + /* determine bottom to top order of vertices */ + { + /* A table of shuffle patterns for putting vertex_header pointers into + correct order. Quite magical. */ + const vec_uchar16 sort_order_patterns[] = { + SHUFFLE4(A,B,C,C), + SHUFFLE4(C,A,B,C), + SHUFFLE4(A,C,B,C), + SHUFFLE4(B,C,A,C), + SHUFFLE4(B,A,C,C), + SHUFFLE4(C,B,A,C) }; + + /* The vertex_header pointers, packed for easy shuffling later */ + const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2}; + + /* Collate y values into two vectors for comparison. + Using only one shuffle constant! ;) */ + const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C)); + + /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ + const vec_uint4 compare = spu_cmpgt(y_012, y_120); + /* Compress the result of the comparison into 4 bits */ + const vec_uint4 gather = spu_gather(compare); + /* Subtract one to attain the index into the LUT. Magical. */ + const unsigned int index = spu_extract(gather, 0) - 1; + + /* Load the appropriate pattern and construct the desired vector. */ + setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]); + + /* Using the result of the comparison, set sign. + Very magical. */ + sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f); + } + + /* Check if triangle is completely outside the tile bounds */ + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) + return FALSE; + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) + return FALSE; + + setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); + setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); + setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + */ + area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; + + setup.oneOverArea = 1.0f / area; + + /* The product of area * sign indicates front/back orientation (0/1). + * Just in case someone gets the bright idea of switching the front + * and back constants without noticing that we're assuming their + * values in this operation, also assert that the values are + * what we think they are. + */ + ASSERT(CELL_FACING_FRONT == 0); + ASSERT(CELL_FACING_BACK == 1); + setup.facing = (area * sign > 0.0f) + ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot which attribute slot + */ +static INLINE void +const_coeff4(uint slot) +{ + setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0 = setup.vprovoke->data[slot]; +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +tri_persp_coeff4(uint slot) +{ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); + + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; + + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void +setup_tri_coefficients(void) +{ + uint i; + + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + switch (spu.vertex_info.attrib[i].interp_mode) { + case INTERP_NONE: + break; + case INTERP_CONSTANT: + const_coeff4(i); + break; + case INTERP_POS: + /* fall-through */ + case INTERP_LINEAR: + tri_linear_coeff4(i); + break; + case INTERP_PERSPECTIVE: + tri_persp_coeff4(i); + break; + default: + ASSERT(0); + } + } +} + + +static void +setup_tri_edges(void) +{ + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) +{ + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + ASSERT((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); + } + + int offset = _y&1; + vec_int4 quad_LlRr = {left, left, right, right}; + /* Store left and right in 0 or 1 row of quad based on offset */ + setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); + setup.span.y_flags |= 1<<offset; + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, + uint tx, uint ty) +{ + setup.tx = tx; + setup.ty = ty; + + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); + + setup.span.y = 0; + setup.span.y_flags = 0; + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); + + if (setup.oneOverArea < 0.0) { + /* emaj on left */ + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); + } + else { + /* emaj on right */ + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); + } + + flush_spans(); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 0000000000..aa694dd7c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 0000000000..b8a0d4a265 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,167 @@ + +#include "cell/common.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" +#include "tgsi/tgsi_parse.h" +//#include "tgsi_build.h" +#include "tgsi/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + ASSERT( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + ASSERT( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + ASSERT( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + ASSERT( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..03375d84a5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" +#include "spu_dcache.h" + +typedef void (*spu_fetch_func)(qword *out, const qword *in, + const qword *shuffle_data); + + +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + ASSERT(count <= 4); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = (spu_fetch_func) + (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); + unsigned i; + unsigned idx; + const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; + const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; + qword in[2 * 4] ALIGN16_ATTRIB; + + + /* Fetch four attributes for four vertices. + */ + idx = 0; + for (i = 0; i < count; i++) { + const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + + spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; + } + + /* Be nice and zero out any missing vertices. + */ + (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + + /* Convert all 4 vertices to vectors of float. + */ + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..fbe5b34d39 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Ian Romanick <idr@us.ibm.com> + */ + +#include <spu_mfcio.h> + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "spu_main.h" + + +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_FAR_BIT 0x10 +#define CLIP_NEAR_BIT 0x20 + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct spu_vs_context *draw, + unsigned elts[4], unsigned count, + const uint64_t *vOut) +{ + struct spu_exec_machine *machine = &draw->machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + ASSERT(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + + +void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs) +{ + const unsigned immediate_addr = vs->immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) + + (immediate_addr & 0x0f)); + + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->instructions; + draw->machine.NumInstructions = vs->num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->declarations; + draw->machine.NumDeclarations = vs->num_declarations; + + draw->num_vs_outputs = vs->num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->num_immediates); +} + + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); + draw->nr_planes = vs->nr_planes; + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..4c74f5e74d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,66 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "cell/common.h" +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + unsigned size[PIPE_MAX_ATTRIBS]; + unsigned code_offset[PIPE_MAX_ATTRIBS]; + unsigned nr_attrs; + boolean dirty; + + spu_full_fetch_func fetch_func; + void *code; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs); + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile new file mode 100644 index 0000000000..f08b8df07a --- /dev/null +++ b/src/gallium/drivers/failover/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = failover + +C_SOURCES = \ + fo_state.c \ + fo_state_emit.c \ + fo_context.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 0000000000..f8e9b1b491 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( + target = 'failover', + source = [ + 'fo_state.c', + 'fo_state_emit.c', + 'fo_context.c', + ]) + +Export('failover') diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c new file mode 100644 index 0000000000..10c4ffc209 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.c @@ -0,0 +1,159 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "util/u_memory.h" +#include "pipe/p_context.h" + +#include "fo_context.h" +#include "fo_winsys.h" + + + +static void failover_destroy( struct pipe_context *pipe ) +{ + struct failover_context *failover = failover_context( pipe ); + + free( failover ); +} + + + +static boolean failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + + /* If there has been any statechange since last time, try hardware + * rendering again: + */ + if (failover->dirty) { + failover->mode = FO_HW; + } + + /* Try hardware: + */ + if (failover->mode == FO_HW) { + if (!failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count )) { + + failover->hw->flush( failover->hw, ~0, NULL ); + failover->mode = FO_SW; + } + } + + /* Possibly try software: + */ + if (failover->mode == FO_SW) { + + if (failover->dirty) + failover_state_emit( failover ); + + failover->sw->draw_elements( failover->sw, + indexBuffer, + indexSize, + prim, + start, + count ); + + /* Be ready to switch back to hardware rendering without an + * intervening flush. Unlikely to be much performance impact to + * this: + */ + failover->sw->flush( failover->sw, ~0, NULL ); + } + + return TRUE; +} + + +static boolean failover_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return failover_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ) +{ + struct failover_context *failover = CALLOC_STRUCT(failover_context); + if (failover == NULL) + return NULL; + + failover->hw = hw; + failover->sw = sw; + failover->pipe.winsys = hw->winsys; + failover->pipe.screen = hw->screen; + failover->pipe.destroy = failover_destroy; +#if 0 + failover->pipe.is_format_supported = hw->is_format_supported; + failover->pipe.get_name = hw->get_name; + failover->pipe.get_vendor = hw->get_vendor; + failover->pipe.get_param = hw->get_param; + failover->pipe.get_paramf = hw->get_paramf; +#endif + + failover->pipe.draw_arrays = failover_draw_arrays; + failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.clear = hw->clear; + + /* No software occlusion fallback (or other optional functionality) + * at this point - if the hardware doesn't support it, don't + * advertise it to the application. + */ + failover->pipe.begin_query = hw->begin_query; + failover->pipe.end_query = hw->end_query; + + failover_init_state_functions( failover ); + + failover->pipe.surface_copy = hw->surface_copy; + failover->pipe.surface_fill = hw->surface_fill; + +#if 0 + failover->pipe.texture_create = hw->texture_create; + failover->pipe.texture_release = hw->texture_release; + failover->pipe.get_tex_surface = hw->get_tex_surface; + failover->pipe.texture_update = hw->texture_update; +#endif + + failover->pipe.flush = hw->flush; + + failover->dirty = 0; + + return &failover->pipe; +} + diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h new file mode 100644 index 0000000000..c6409fe1e1 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef FO_CONTEXT_H +#define FO_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + + +#define FO_NEW_VIEWPORT 0x1 +#define FO_NEW_RASTERIZER 0x2 +#define FO_NEW_FRAGMENT_SHADER 0x4 +#define FO_NEW_BLEND 0x8 +#define FO_NEW_CLIP 0x10 +#define FO_NEW_SCISSOR 0x20 +#define FO_NEW_STIPPLE 0x40 +#define FO_NEW_FRAMEBUFFER 0x80 +#define FO_NEW_ALPHA_TEST 0x100 +#define FO_NEW_DEPTH_STENCIL 0x200 +#define FO_NEW_SAMPLER 0x400 +#define FO_NEW_TEXTURE 0x800 +#define FO_NEW_VERTEX 0x2000 +#define FO_NEW_VERTEX_SHADER 0x4000 +#define FO_NEW_BLEND_COLOR 0x8000 +#define FO_NEW_CLEAR_COLOR 0x10000 +#define FO_NEW_VERTEX_BUFFER 0x20000 +#define FO_NEW_VERTEX_ELEMENT 0x40000 + + + +#define FO_HW 0 +#define FO_SW 1 + +struct fo_state { + void *sw_state; + void *hw_state; +}; +struct failover_context { + struct pipe_context pipe; /**< base class */ + + + /* The most recent drawing state as set by the driver: + */ + const struct fo_state *blend; + const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *depth_stencil; + const struct fo_state *rasterizer; + const struct fo_state *fragment_shader; + const struct fo_state *vertex_shader; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + + uint num_vertex_buffers; + uint num_vertex_elements; + + void *sw_sampler_state[PIPE_MAX_SAMPLERS]; + void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + + unsigned mode; + struct pipe_context *hw; + struct pipe_context *sw; +}; + + + +void failover_init_state_functions( struct failover_context *failover ); +void failover_state_emit( struct failover_context *failover ); + +static INLINE struct failover_context * +failover_context( struct pipe_context *pipe ) +{ + return (struct failover_context *)pipe; +} + + +#endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c new file mode 100644 index 0000000000..6a79706632 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state.c @@ -0,0 +1,483 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_inlines.h" + +#include "fo_context.h" + + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + + +static void * +failover_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *blend ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_blend_state(failover->sw, blend); + state->hw_state = failover->hw->create_blend_state(failover->hw, blend); + + return state; +} + +static void +failover_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)blend; + failover->blend = state; + failover->dirty |= FO_NEW_BLEND; + failover->sw->bind_blend_state( failover->sw, state->sw_state ); + failover->hw->bind_blend_state( failover->hw, state->hw_state ); +} + +static void +failover_delete_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct fo_state *state = (struct fo_state*)blend; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_blend_state(failover->sw, state->sw_state); + failover->hw->delete_blend_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->blend_color = *blend_color; + failover->dirty |= FO_NEW_BLEND_COLOR; + failover->sw->set_blend_color( failover->sw, blend_color ); + failover->hw->set_blend_color( failover->hw, blend_color ); +} + +static void +failover_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->clip = *clip; + failover->dirty |= FO_NEW_CLIP; + failover->sw->set_clip_state( failover->sw, clip ); + failover->hw->set_clip_state( failover->hw, clip ); +} + + +static void * +failover_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); + state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)depth_stencil; + failover->depth_stencil = state; + failover->dirty |= FO_NEW_DEPTH_STENCIL; + failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); +} + +static void +failover_delete_depth_stencil_state(struct pipe_context *pipe, + void *ds) +{ + struct fo_state *state = (struct fo_state*)ds; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *framebuffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->framebuffer = *framebuffer; + failover->dirty |= FO_NEW_FRAMEBUFFER; + failover->sw->set_framebuffer_state( failover->sw, framebuffer ); + failover->hw->set_framebuffer_state( failover->hw, framebuffer ); +} + + +static void * +failover_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_fs_state(failover->sw, templ); + state->hw_state = failover->hw->create_fs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)fs; + failover->fragment_shader = state; + failover->dirty |= FO_NEW_FRAGMENT_SHADER; + failover->sw->bind_fs_state(failover->sw, state->sw_state); + failover->hw->bind_fs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_fs_state(struct pipe_context *pipe, + void *fs) +{ + struct fo_state *state = (struct fo_state*)fs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_fs_state(failover->sw, state->sw_state); + failover->hw->delete_fs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void * +failover_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_vs_state(failover->sw, templ); + state->hw_state = failover->hw->create_vs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)vs; + failover->vertex_shader = state; + failover->dirty |= FO_NEW_VERTEX_SHADER; + failover->sw->bind_vs_state(failover->sw, state->sw_state); + failover->hw->bind_vs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct fo_state *state = (struct fo_state*)vs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_vs_state(failover->sw, state->sw_state); + failover->hw->delete_vs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->poly_stipple = *stipple; + failover->dirty |= FO_NEW_STIPPLE; + failover->sw->set_polygon_stipple( failover->sw, stipple ); + failover->hw->set_polygon_stipple( failover->hw, stipple ); +} + + +static void * +failover_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); + state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)raster; + failover->rasterizer = state; + failover->dirty |= FO_NEW_RASTERIZER; + failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); + failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); +} + +static void +failover_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct fo_state *state = (struct fo_state*)raster; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); + failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->scissor = *scissor; + failover->dirty |= FO_NEW_SCISSOR; + failover->sw->set_scissor_state( failover->sw, scissor ); + failover->hw->set_scissor_state( failover->hw, scissor ); +} + + +static void * +failover_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); + state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)sampler; + uint i; + assert(num <= PIPE_MAX_SAMPLERS); + /* Check for no-op */ + if (num == failover->num_samplers && + !memcmp(failover->sampler, sampler, num * sizeof(void *))) + return; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; + failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; + } + failover->dirty |= FO_NEW_SAMPLER; + failover->num_samplers = num; + failover->sw->bind_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_sampler_states(failover->hw, num, + failover->hw_sampler_state); +} + +static void +failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + struct fo_state *state = (struct fo_state*)sampler; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_sampler_state(failover->sw, state->sw_state); + failover->hw->delete_sampler_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct failover_context *failover = failover_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == failover->num_textures && + !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) + return; + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + texture[i]); + for (i = num; i < failover->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + NULL); + failover->dirty |= FO_NEW_TEXTURE; + failover->num_textures = num; + failover->sw->set_sampler_textures( failover->sw, num, texture ); + failover->hw->set_sampler_textures( failover->hw, num, texture ); +} + + +static void +failover_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->viewport = *viewport; + failover->dirty |= FO_NEW_VIEWPORT; + failover->sw->set_viewport_state( failover->sw, viewport ); + failover->hw->set_viewport_state( failover->hw, viewport ); +} + + +static void +failover_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vertex_buffers) +{ + struct failover_context *failover = failover_context(pipe); + + memcpy(failover->vertex_buffers, vertex_buffers, + count * sizeof(vertex_buffers[0])); + failover->dirty |= FO_NEW_VERTEX_BUFFER; + failover->num_vertex_buffers = count; + failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); + failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); +} + + +static void +failover_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *vertex_elements) +{ + struct failover_context *failover = failover_context(pipe); + + memcpy(failover->vertex_elements, vertex_elements, + count * sizeof(vertex_elements[0])); + + failover->dirty |= FO_NEW_VERTEX_ELEMENT; + failover->num_vertex_elements = count; + failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); + failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); +} + +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct failover_context *failover = failover_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + failover->sw->set_constant_buffer(failover->sw, shader, index, buf); + failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + + +void +failover_init_state_functions( struct failover_context *failover ) +{ + failover->pipe.create_blend_state = failover_create_blend_state; + failover->pipe.bind_blend_state = failover_bind_blend_state; + failover->pipe.delete_blend_state = failover_delete_blend_state; + failover->pipe.create_sampler_state = failover_create_sampler_state; + failover->pipe.bind_sampler_states = failover_bind_sampler_states; + failover->pipe.delete_sampler_state = failover_delete_sampler_state; + failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; + failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; + failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state; + failover->pipe.create_rasterizer_state = failover_create_rasterizer_state; + failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state; + failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state; + failover->pipe.create_fs_state = failover_create_fs_state; + failover->pipe.bind_fs_state = failover_bind_fs_state; + failover->pipe.delete_fs_state = failover_delete_fs_state; + failover->pipe.create_vs_state = failover_create_vs_state; + failover->pipe.bind_vs_state = failover_bind_vs_state; + failover->pipe.delete_vs_state = failover_delete_vs_state; + + failover->pipe.set_blend_color = failover_set_blend_color; + failover->pipe.set_clip_state = failover_set_clip_state; + failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; + failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; + failover->pipe.set_scissor_state = failover_set_scissor_state; + failover->pipe.set_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_viewport_state = failover_set_viewport_state; + failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_vertex_elements = failover_set_vertex_elements; + failover->pipe.set_constant_buffer = failover_set_constant_buffer; +} diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c new file mode 100644 index 0000000000..bd4fce9d20 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "fo_context.h" + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + +/* Bring the software pipe uptodate with current state. + * + * With constant state objects we would probably just send all state + * to both rasterizers all the time??? + */ +void +failover_state_emit( struct failover_context *failover ) +{ + if (failover->dirty & FO_NEW_BLEND) + failover->sw->bind_blend_state( failover->sw, + failover->blend->sw_state ); + + if (failover->dirty & FO_NEW_BLEND_COLOR) + failover->sw->set_blend_color( failover->sw, &failover->blend_color ); + + if (failover->dirty & FO_NEW_CLIP) + failover->sw->set_clip_state( failover->sw, &failover->clip ); + + if (failover->dirty & FO_NEW_DEPTH_STENCIL) + failover->sw->bind_depth_stencil_alpha_state( failover->sw, + failover->depth_stencil->sw_state ); + + if (failover->dirty & FO_NEW_FRAMEBUFFER) + failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer ); + + if (failover->dirty & FO_NEW_FRAGMENT_SHADER) + failover->sw->bind_fs_state( failover->sw, + failover->fragment_shader->sw_state ); + + if (failover->dirty & FO_NEW_VERTEX_SHADER) + failover->sw->bind_vs_state( failover->sw, + failover->vertex_shader->sw_state ); + + if (failover->dirty & FO_NEW_STIPPLE) + failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); + + if (failover->dirty & FO_NEW_RASTERIZER) + failover->sw->bind_rasterizer_state( failover->sw, + failover->rasterizer->sw_state ); + + if (failover->dirty & FO_NEW_SCISSOR) + failover->sw->set_scissor_state( failover->sw, &failover->scissor ); + + if (failover->dirty & FO_NEW_VIEWPORT) + failover->sw->set_viewport_state( failover->sw, &failover->viewport ); + + if (failover->dirty & FO_NEW_SAMPLER) { + failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); + } + + if (failover->dirty & FO_NEW_TEXTURE) { + failover->sw->set_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); + } + + if (failover->dirty & FO_NEW_VERTEX_BUFFER) { + failover->sw->set_vertex_buffers( failover->sw, + failover->num_vertex_buffers, + failover->vertex_buffers ); + } + + if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { + failover->sw->set_vertex_elements( failover->sw, + failover->num_vertex_elements, + failover->vertex_elements ); + } + + failover->dirty = 0; +} diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h new file mode 100644 index 0000000000..a8ce997a1f --- /dev/null +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef FO_WINSYS_H +#define FO_WINSYS_H + + +/* This is the interface that failover requires any window system + * hosting it to implement. This is the only include file in failover + * which is public. + */ + + +struct pipe_context; + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ); + + +#endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile new file mode 100644 index 0000000000..41a61a0020 --- /dev/null +++ b/src/gallium/drivers/i915simple/Makefile @@ -0,0 +1,31 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915simple + +C_SOURCES = \ + i915_blit.c \ + i915_clear.c \ + i915_flush.c \ + i915_context.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_state.c \ + i915_state_immediate.c \ + i915_state_dynamic.c \ + i915_state_derived.c \ + i915_state_emit.c \ + i915_state_sampler.c \ + i915_screen.c \ + i915_prim_emit.c \ + i915_prim_vbuf.c \ + i915_texture.c \ + i915_fpc_emit.c \ + i915_fpc_translate.c \ + i915_surface.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript new file mode 100644 index 0000000000..2366e1247f --- /dev/null +++ b/src/gallium/drivers/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( + target = 'i915simple', + source = [ + 'i915_blit.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_screen.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h new file mode 100644 index 0000000000..45bf4f4028 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "i915_winsys.h" + +struct i915_batchbuffer +{ + struct pipe_buffer *buffer; + struct i915_winsys *winsys; + + unsigned char *map; + unsigned char *ptr; + + size_t size; + size_t actual_size; + + size_t relocs; + size_t max_relocs; +}; + +static INLINE boolean +i915_batchbuffer_check( struct i915_batchbuffer *batch, + size_t dwords, + size_t relocs ) +{ + /** TODO JB: Check relocs */ + return dwords * 4 <= batch->size - (batch->ptr - batch->map); +} + +static INLINE size_t +i915_batchbuffer_space( struct i915_batchbuffer *batch ) +{ + return batch->size - (batch->ptr - batch->map); +} + +static INLINE void +i915_batchbuffer_dword( struct i915_batchbuffer *batch, + unsigned dword ) +{ + if (i915_batchbuffer_space(batch) < 4) + return; + + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; +} + +static INLINE void +i915_batchbuffer_write( struct i915_batchbuffer *batch, + void *data, + size_t size ) +{ + if (i915_batchbuffer_space(batch) < size) + return; + + memcpy(data, batch->ptr, size); + batch->ptr += size; +} + +static INLINE void +i915_batchbuffer_reloc( struct i915_batchbuffer *batch, + struct pipe_buffer *buffer, + size_t flags, + size_t offset ) +{ + batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset ); +} + +static INLINE void +i915_batchbuffer_flush( struct i915_batchbuffer *batch, + struct pipe_fence_handle **fence ) +{ + batch->winsys->batch_flush( batch->winsys, fence ); +} + +#define BEGIN_BATCH( dwords, relocs ) \ + (i915_batchbuffer_check( i915->batch, dwords, relocs )) + +#define OUT_BATCH( dword ) \ + i915_batchbuffer_dword( i915->batch, dword ) + +#define OUT_RELOC( buf, flags, delta ) \ + i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) + +#define FLUSH_BATCH(fence) do { \ + i915->winsys->batch_flush( i915->winsys, fence ); \ + i915->hardware_dirty = ~0; \ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c new file mode 100644 index 0000000000..45fae4c999 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -0,0 +1,157 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_debug.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + +// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + + if (!BEGIN_BATCH(6, 1)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(6, 1)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH(color); +} + + +void +i915_copy_blit( struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h ) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + + + I915_DBG(i915, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + src_pitch *= (short) cpp; + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = + (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = + (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + /* Hardware can handle negative pitches but loses the ability to do + * proper overlapping blits in that case. We don't really have a + * need for either at this stage. + */ + assert (dst_pitch > 0 && src_pitch > 0); + + + if (!BEGIN_BATCH(8, 2)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(8, 2)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(((int) src_pitch & 0xffff)); + OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); +} + + diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h new file mode 100644 index 0000000000..6e5b44e124 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short srcx, short srcy, + short dstx, short dsty, + short w, short h ); + +extern void i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c new file mode 100644 index 0000000000..8a2d3ca43f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_DEFINED; +} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c new file mode 100644 index 0000000000..6dd3eda85d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -0,0 +1,193 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_state.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +static void i915_destroy( struct pipe_context *pipe ) +{ + struct i915_context *i915 = i915_context( pipe ); + + draw_destroy( i915->draw ); + + if(i915->winsys->destroy) + i915->winsys->destroy(i915->winsys); + + FREE( i915 ); +} + + +static boolean +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context( pipe ); + struct draw_context *draw = i915->draw; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + /* + * Map vertex buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + void *buf + = pipe_buffer_map(pipe->screen, + i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + draw_set_mapped_constant_buffer(draw, + i915->current.constants[PIPE_SHADER_VERTEX], + ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float) )); + + /* draw! */ + draw_arrays(i915->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); + } + + return TRUE; +} + +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count ); +} + +static boolean i915_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *i915_create_context( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys ) +{ + struct i915_context *i915; + + i915 = CALLOC_STRUCT(i915_context); + if (i915 == NULL) + return NULL; + + i915->winsys = i915_winsys; + i915->pipe.winsys = pipe_winsys; + i915->pipe.screen = screen; + + i915->pipe.destroy = i915_destroy; + + i915->pipe.clear = i915_clear; + + + i915->pipe.draw_arrays = i915_draw_arrays; + i915->pipe.draw_elements = i915_draw_elements; + i915->pipe.draw_range_elements = i915_draw_range_elements; + + /* + * Create drawing context and plug our rendering stage into it. + */ + i915->draw = draw_create(); + assert(i915->draw); + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); + } + else { + draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); + } + + i915_init_surface_functions(i915); + i915_init_state_functions(i915); + i915_init_flush_functions(i915); + i915_init_texture_functions(i915); + + draw_install_aaline_stage(i915->draw, &i915->pipe); + draw_install_aapoint_stage(i915->draw, &i915->pipe); + + i915->dirty = ~0; + i915->hardware_dirty = ~0; + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915_winsys->batch_get(i915_winsys); + i915->batch->winsys = i915_winsys; + + return &i915->pipe; +} + diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h new file mode 100644 index 0000000000..3cdabe45f9 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -0,0 +1,345 @@ + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; + unsigned minlod; + unsigned maxlod; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned stride; + unsigned depth_stride; /* per-image on i945? */ + unsigned total_nblocksy; + + unsigned tiled; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_batchbuffer; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + struct i915_batchbuffer *batch; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + size_t vbo_offset; + unsigned vbo_flushed; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c new file mode 100644 index 0000000000..7a4e7051d2 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -0,0 +1,900 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_debug.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); +} + + +static boolean debug( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned i; + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; + } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name, + boolean dump_floats, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i; + + + + PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); + PRINTF(stream, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + PRINTF(stream, "\t0x%08x\n", ptr[i]); + } + + + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + PRINTF(stream, "%s (%d dwords):\n", name, len); + i915_disassemble_program( stream, ptr, len ); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned old_offset = stream->offset + len * sizeof(unsigned); + unsigned i; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i, len; + + ushort *idx = (ushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static void +BITS( + struct debug_stream *stream, + unsigned dw, + unsigned hi, + unsigned lo, + const char *fmt, + ... ) +{ + va_list args; + unsigned himask = ~0UL >> (31 - (hi)); + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#ifdef DEBUG +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) +#else +#define MBZ( dw, hi, lo) do { \ +} while (0) +#endif + +static void +FLAG( + struct debug_stream *stream, + unsigned dw, + unsigned bit, + const char *fmt, + ... ) +{ + if (((dw) >> (bit)) & 1) { + va_list args; + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, "\n"); + } +} + +static boolean debug_load_immediate( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 4) & 0xff; + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 29, 24, "vb dword width"); + BITS(stream, ptr[j], 21, 16, "vb dword pitch"); + BITS(stream, ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(stream, tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 23, "point width"); + BITS(stream, ptr[j], 22, 19, "line width"); + FLAG(stream, ptr[j], 18, "alpha flatshade"); + FLAG(stream, ptr[j], 17, "fog flatshade"); + FLAG(stream, ptr[j], 16, "spec flatshade"); + FLAG(stream, ptr[j], 15, "rgb flatshade"); + BITS(stream, ptr[j], 14, 13, "cull mode"); + FLAG(stream, ptr[j], 12, "vfmt: point width"); + FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); + FLAG(stream, ptr[j], 10, "vfmt: rgba"); + FLAG(stream, ptr[j], 9, "vfmt: depth offset"); + BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(stream, ptr[j], 5, "force dflt diffuse"); + FLAG(stream, ptr[j], 4, "force dflt specular"); + FLAG(stream, ptr[j], 3, "local depth offset enable"); + FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(stream, ptr[j], 1, "sprite point"); + FLAG(stream, ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 28, "rgba write disables"); + FLAG(stream, ptr[j], 27, "force dflt point width"); + FLAG(stream, ptr[j], 26, "last pixel enable"); + FLAG(stream, ptr[j], 25, "global z offset enable"); + FLAG(stream, ptr[j], 24, "fog enable"); + BITS(stream, ptr[j], 23, 16, "stencil ref"); + BITS(stream, ptr[j], 15, 13, "stencil test"); + BITS(stream, ptr[j], 12, 10, "stencil fail op"); + BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); + BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(stream, ptr[j], 3, "stencil write enable"); + FLAG(stream, ptr[j], 2, "stencil test enable"); + FLAG(stream, ptr[j], 1, "color dither enable"); + FLAG(stream, ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "alpha test enable"); + BITS(stream, ptr[j], 30, 28, "alpha func"); + BITS(stream, ptr[j], 27, 20, "alpha ref"); + FLAG(stream, ptr[j], 19, "depth test enable"); + BITS(stream, ptr[j], 18, 16, "depth func"); + FLAG(stream, ptr[j], 15, "blend enable"); + BITS(stream, ptr[j], 14, 12, "blend func"); + BITS(stream, ptr[j], 11, 8, "blend src factor"); + BITS(stream, ptr[j], 7, 4, "blend dst factor"); + FLAG(stream, ptr[j], 3, "depth write enable"); + FLAG(stream, ptr[j], 2, "color write enable"); + BITS(stream, ptr[j], 1, 0, "provoking vertex"); + j++; + } + + + PRINTF(stream, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + +static boolean debug_load_indirect( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 8) & 0x3f; + unsigned i, j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + PRINTF(stream, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + PRINTF(stream, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + PRINTF(stream, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + PRINTF(stream, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + PRINTF(stream, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + PRINTF(stream, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + default: + assert(0); + break; + } + } + } + + if (bits == 0) { + PRINTF(stream, "\t DUMMY: 0x%08x\n", ptr[j++]); + } + + PRINTF(stream, "\n"); + + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + +static void BR13( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + FLAG(stream, val, 30, "clipping enable"); + BITS(stream, val, 25, 24, "color depth (3==32bpp)"); + BITS(stream, val, 23, 16, "raster op"); + BITS(stream, val, 15, 0, "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y1"); + BITS(stream, val, 15, 0, "dest x1"); +} + +static void BR23( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y2"); + BITS(stream, val, 15, 0, "dest x2"); +} + +static void BR09( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- dest address\n", val); +} + +static void BR26( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "src y1"); + BITS(stream, val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 15, 0, "src pitch"); +} + +static void BR12( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- color\n", val); +} + +static boolean debug_copy_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 21, 18, "logicop func"); + FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); + FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); + BITS(stream, ptr[j], 15, 8, "stencil test mask"); + BITS(stream, ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(stream, ptr[j], 1, "vertical line stride"); + FLAG(stream, ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "height"); + BITS(stream, ptr[j], 20, 10, "width"); + BITS(stream, ptr[j], 9, 7, "surface format"); + BITS(stream, ptr[j], 6, 3, "texel format"); + FLAG(stream, ptr[j], 2, "use fence regs"); + FLAG(stream, ptr[j], 1, "tiled surface"); + FLAG(stream, ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "dword pitch"); + BITS(stream, ptr[j], 20, 15, "cube face enables"); + BITS(stream, ptr[j], 14, 9, "max lod"); + FLAG(stream, ptr[j], 8, "mip layout right"); + BITS(stream, ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "reverse gamma"); + FLAG(stream, ptr[j], 30, "planar to packed"); + FLAG(stream, ptr[j], 29, "yuv->rgb"); + BITS(stream, ptr[j], 28, 27, "chromakey index"); + BITS(stream, ptr[j], 26, 22, "base mip level"); + BITS(stream, ptr[j], 21, 20, "mip mode filter"); + BITS(stream, ptr[j], 19, 17, "mag mode filter"); + BITS(stream, ptr[j], 16, 14, "min mode filter"); + BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(stream, ptr[j], 4, "shadow enable"); + FLAG(stream, ptr[j], 3, "max-aniso-4"); + BITS(stream, ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(stream, ptr[j], 17, "kill pixel enable"); + FLAG(stream, ptr[j], 16, "keyed tex filter mode"); + FLAG(stream, ptr[j], 15, "chromakey enable"); + BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); + BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); + BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); + FLAG(stream, ptr[j], 5, "normalized coords"); + BITS(stream, ptr[j], 4, 1, "map (surface) index"); + FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "early classic ztest"); + FLAG(stream, ptr[j], 30, "opengl tex default color"); + FLAG(stream, ptr[j], 29, "bypass iz"); + FLAG(stream, ptr[j], 28, "lod preclamp"); + BITS(stream, ptr[j], 27, 26, "dither pattern"); + FLAG(stream, ptr[j], 25, "linear gamma blend"); + FLAG(stream, ptr[j], 24, "debug dither"); + BITS(stream, ptr[j], 23, 20, "dstorg x"); + BITS(stream, ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(stream, ptr[j], 14, 12, "422 write select"); + BITS(stream, ptr[j], 11, 8, "cbuf format"); + BITS(stream, ptr[j], 3, 2, "zbuf format"); + FLAG(stream, ptr[j], 1, "vert line stride"); + FLAG(stream, ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 28, 28, "aux buffer id"); + BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(stream, ptr[j], 23, "use fence regs"); + FLAG(stream, ptr[j], 22, "tiled surface"); + FLAG(stream, ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(stream, ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + (void)debug(stream, "UNKNOWN 0x0 case!", 1); + assert(0); + break; + } + break; + case 0x1: + (void) debug(stream, "UNKNOWN 0x1 case!", 1); + assert(0); + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + (void) debug(stream, "UNKNOWN 0x1c case!", 1); + assert(0); + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( struct i915_context *i915 ) +{ + struct debug_stream stream; + /* TODO fix me */ + unsigned *start = 0;/*i915->batch_start;*/ + unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/ + unsigned long bytes = (unsigned long) (end - start) * 4; + boolean done = FALSE; + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + stream.winsys = i915->pipe.winsys; + + if (!start || !end) { + debug_printf( "\n\nBATCH: ???\n"); + return; + } + + debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + + while (!done && + stream.offset < bytes) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); + } + + debug_printf( "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h new file mode 100644 index 0000000000..afb63edabf --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include <stdarg.h> + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; + struct pipe_winsys *winsys; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH 0x1 +#define DEBUG_BLIT 0x2 +#define DEBUG_BUFFER 0x4 +#define DEBUG_CONSTANTS 0x8 +#define DEBUG_CONTEXT 0x10 +#define DEBUG_DRAW 0x20 +#define DEBUG_DYNAMIC 0x40 +#define DEBUG_FLUSH 0x80 +#define DEBUG_MAP 0x100 +#define DEBUG_PROGRAM 0x200 +#define DEBUG_REGIONS 0x400 +#define DEBUG_SAMPLER 0x800 +#define DEBUG_STATIC 0x1000 +#define DEBUG_SURFACE 0x2000 +#define DEBUG_WINSYS 0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/p_winsys.h" + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + if ((i915)->debug & FILE_DEBUG_FLAG) { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + +#else + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + (void) i915; + (void) fmt; +} + +#endif + + +void i915_dump_batchbuffer( struct i915_context *i915 ); + + + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c new file mode 100644 index 0000000000..48be3e1472 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "util/u_memory.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); +} + + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF(stream, "T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF(stream, "T_SPECULAR"); + return; + case T_FOG_W: + PRINTF(stream, "T_FOG_W"); + return; + default: + PRINTF(stream, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF(stream, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF(stream, "oD"); + return; + } + break; + default: + break; + } + + PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF(stream, "."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF(stream, "-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF(stream, "x"); + break; + case 1: + PRINTF(stream, "y"); + break; + case 2: + PRINTF(stream, "z"); + break; + case 3: + PRINTF(stream, "w"); + break; + case 4: + PRINTF(stream, "0"); + break; + case 5: + PRINTF(stream, "1"); + break; + default: + PRINTF(stream, "?"); + break; + } + } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF(stream, "."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF(stream, "x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF(stream, "y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF(stream, "z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(stream, program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(stream, " = SATURATE "); + else + PRINTF(stream, " = "); + } + + PRINTF(stream, "%s ", opcodes[opcode]); + + print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC2_REG(program[2])); + PRINTF(stream, "\n"); + return; +} + + +static void +print_tex_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, " = "); + + PRINTF(stream, "%s ", opcodes[opcode]); + + PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "TEXKIL "); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "%s ", opcodes[opcode]); + print_dest_reg(stream, + program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream, + const unsigned * program, unsigned sz) +{ + unsigned i; + + PRINTF(stream, "\t\tBEGIN\n"); + + assert((program[0] & 0x1ff) + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + unsigned opcode = program[0] & (0x1f << 24); + + PRINTF(stream, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(stream, opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) + print_tex_op(stream, opcode >> 24, program); + else if (opcode == T0_TEXKILL) + print_texkil_op(stream, opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(stream, opcode >> 24, program); + else + PRINTF(stream, "Unknown opcode 0x%x\n", opcode); + } + + PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c new file mode 100644 index 0000000000..472e0ab774 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +static void i915_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct i915_context *i915 = i915_context(pipe); + + draw_flush(i915->draw); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + unsigned flush = MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush |= INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush |= FLUSH_MAP_CACHE; + + if (!BEGIN_BATCH(1, 0)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(1, 0)); + } + OUT_BATCH( flush ); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH(fence); + i915->vbo_flushed = 1; +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ + i915->pipe.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h new file mode 100644 index 0000000000..2f0f99d046 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { + struct i915_fragment_shader *shader; /* the shader we're compiling */ + + boolean used_constants[I915_MAX_CONSTANT]; + + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; + + boolean first_instruction; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint *csr; /**< Cursor, points into program. */ + + uint *decl; /**< Cursor, points into declarations. */ + + uint decl_s; /**< flags for which s regs need to be decl'd */ + uint decl_t; /**< flags for which t regs need to be decl'd */ + + uint temp_flag; /**< Tracks temporary regs which are in use */ + uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + + boolean error; /**< Set if i915_program_error() is called */ + uint wpos_tex; + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ + assert(x <= SRC_ONE); + assert(y <= SRC_ONE); + assert(z <= SRC_ONE); + assert(w <= SRC_ONE); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c new file mode 100644 index 0000000000..b054ce41d3 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" +#include "util/u_math.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) +#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) +#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) +#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ + (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) + + +/* Macros for translating UREG's into the various register fields used + * by the I915 programmable unit. + */ +#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) +#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) +#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) +#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) + +#define UREG_MASK 0xffffff00 +#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ + (REG_NR_MASK << UREG_NR_SHIFT)) + + +uint +i915_get_temp(struct i915_fp_compile *p) +{ + int bit = ffs(~p->temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); +} + + +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint opcode ) +{ + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + + if (coord != k) { + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. + */ + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + /* XXX release utemp here? */ + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + /* is the sampler coord a texcoord input reg? */ + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (opcode | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 4; idx++) { + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); + } + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c new file mode 100644 index 0000000000..43d62c5176 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -0,0 +1,1190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdarg.h> + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] = +{ + _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + + /* declare input color: + */ + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | + D0_CHANNEL_ALL), + 0, + 0, + + /* move to output color: + */ + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)), + 0x01230000, /* .xyzw */ + 0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0f / (3 * 2 * 1), + 1.0f / (5 * 4 * 3 * 2 * 1), + -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0f / (2 * 1), + 1.0f / (4 * 3 * 2 * 1), + -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + /* Another neat thing about the UREG representation */ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ +static void +i915_use_passthrough_shader(struct i915_fragment_shader *fs) +{ + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); + } + fs->num_constants = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + char buffer[1024]; + + debug_printf("i915_program_error: "); + va_start( args, msg ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); + va_end( args ); + debug_printf(buffer); + debug_printf("\n"); + + p->error = 1; +} + + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + uint index = source->SrcRegister.Index; + uint src = 0, sem_name, sem_ind; + + switch (source->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + + sem_name = p->shader->info.input_semantic_name[index]; + sem_ind = p->shader->info.input_semantic_index[index]; + + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + debug_printf("SKIP SEM POS\n"); + /* + assert(p->wpos_tex != -1); + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); + */ + break; + case TGSI_SEMANTIC_COLOR: + if (sem_ind == 0) { + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + } + else { + /* secondary color */ + assert(sem_ind == 1); + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + } + break; + case TGSI_SEMANTIC_FOG: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); + break; + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + case TGSI_FILE_IMMEDIATE: + assert(index < p->num_immediates); + index = p->immediates_map[index]; + /* fall-through */ + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + if (source->SrcRegister.Extended) { + src = swizzle(src, + source->SrcRegisterExtSwz.ExtSwizzleX, + source->SrcRegisterExtSwz.ExtSwizzleY, + source->SrcRegisterExtSwz.ExtSwizzleZ, + source->SrcRegisterExtSwz.ExtSwizzleW); + } + else { + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + } + + + /* There's both negate-all-components and per-component negation. + * Try to handle both here. + */ + { + int nx = source->SrcRegisterExtSwz.NegateX; + int ny = source->SrcRegisterExtSwz.NegateY; + int nz = source->SrcRegisterExtSwz.NegateZ; + int nw = source->SrcRegisterExtSwz.NegateW; + if (source->SrcRegister.Negate) { + nx = !nx; + ny = !ny; + nz = !nz; + nw = !nw; + } + src = negate(src, nx, ny, nz, nw); + } + + /* no abs() or post-abs negation */ +#if 0 + /* XXX assertions disabled to allow arbfplight.c to run */ + /* XXX enable these assertions, or fix things */ + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#endif + return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + return UREG(REG_TYPE_OD, 0); + case TGSI_SEMANTIC_COLOR: + return UREG(REG_TYPE_OC, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index/semantics"); + return 0; + } + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + */ +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + emit_simple_arith(p, inst, A0_ADD, 2); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + emit_simple_arith(p, inst, A0_DP3, 2); + break; + + case TGSI_OPCODE_DP4: + emit_simple_arith(p, inst, A0_DP4, 2); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_END: + /* no-op */ + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + emit_simple_arith(p, inst, A0_FLR, 1); + break; + + case TGSI_OPCODE_FRC: + emit_simple_arith(p, inst, A0_FRC, 1); + break; + + case TGSI_OPCODE_KIL: + /* kill if src[0].x < 0 || src[0].y < 0 ... */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ + break; + + case TGSI_OPCODE_KILP: + assert(0); /* not tested yet */ + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + emit_simple_arith(p, inst, A0_MAD, 3); + break; + + case TGSI_OPCODE_MAX: + emit_simple_arith(p, inst, A0_MAX, 2); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + emit_simple_arith(p, inst, A0_MOV, 1); + break; + + case TGSI_OPCODE_MUL: + emit_simple_arith(p, inst, A0_MUL, 2); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RET: + /* XXX: no-op? */ + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + emit_simple_arith(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + emit_tex(p, inst, T0_TEXLD); + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct i915_fragment_shader *ifs = p->shader; + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + for (j = 0; j < imm->Immediate.Size; j++) { + p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, + struct i915_fragment_shader *ifs) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->shader = ifs; + + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. + */ + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; + + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; + p->utemp_flag = ~0x7; + + p->wpos_tex = -1; + + /* initialize the first program word */ + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned long program_size = (unsigned long) (p->csr - p->program); + unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + if (p->error) { + p->NumNativeInstructions = 0; + p->NumNativeAluInstructions = 0; + p->NumNativeTexInstructions = 0; + p->NumNativeTexIndirections = 0; + + i915_use_passthrough_shader(ifs); + } + else { + p->NumNativeInstructions + = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; + p->NumNativeAluInstructions = p->nr_alu_insn; + p->NumNativeTexInstructions = p->nr_tex_insn; + p->NumNativeTexIndirections = p->nr_tex_indirect; + + /* patch in the program length */ + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + assert(!ifs->program); + ifs->program + = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); + if (ifs->program) { + ifs->program_len = program_size + decl_size; + + memcpy(ifs->program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(ifs->program + decl_size, + p->program, + program_size * sizeof(uint)); + } + } + + /* Release the compilation struct: + */ + FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 + const uint inputs + = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ + uint i; + + p->wpos_tex = -1; + + if (inputs & (1 << TGSI_ATTRIB_POS)) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { + p->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +#else + if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* frag shader using the fragment position input */ +#if 0 + assert(0); +#endif + } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + /* XXX assuming pos/depth is always in output[0] */ + if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + const uint depth = UREG(REG_TYPE_OD, 0); + + i915_emit_arith(p, + A0_MOV, /* opcode */ + depth, /* dest reg */ + A0_DEST_CHANNEL_W, /* write mask */ + 0, /* saturate? */ + swizzle(depth, X, Y, Z, Z), /* src0 */ + 0, 0 /* src1, src2 */); + } +} + + +void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) +{ + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; + + i915_find_wpos_space(p); + +#if 0 + tgsi_dump(tokens, 0); +#endif + + i915_translate_instructions(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c new file mode 100644 index 0000000000..8f1f58b2dd --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "draw/draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware. No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct i915_context *i915; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, + const struct vertex_header *vertex) +{ + const struct vertex_info *vinfo = &i915->current.vertex_info; + uint i; + uint count = 0; /* for debug/sanity */ + + assert(!i915->dirty); + + for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->attrib[i].src_index; + const float *attrib = vertex->data[j]; + switch (vinfo->attrib[i].emit) { + case EMIT_1F: + OUT_BATCH( fui(attrib[0]) ); + count++; + break; + case EMIT_2F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + count += 2; + break; + case EMIT_3F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + count += 3; + break; + case EMIT_4F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); + count += 4; + break; + case EMIT_4UB: + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +} + + + +static INLINE void +emit_prim( struct draw_stage *stage, + struct prim_header *prim, + unsigned hwprim, + unsigned nr ) +{ + struct i915_context *i915 = setup_stage(stage)->i915; + unsigned vertex_size; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + assert(0); + return; + } + } + + /* Emit each triangle as a single primitive. I told you this was + * simple. + */ + OUT_BATCH(_3DPRIMITIVE | + hwprim | + ((4 + vertex_size * nr)/4 - 2)); + + for (i = 0; i < nr; i++) + emit_hw_vertex(i915, prim->v[i]); +} + + +static void +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->i915 = i915; + setup->stage.draw = i915->draw; + setup->stage.point = setup_point; + setup->stage.line = setup_line; + setup->stage.tri = setup_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + return &setup->stage; +} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c new file mode 100644 index 0000000000..a8e97e7c30 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -0,0 +1,545 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { + struct vbuf_render base; + + struct i915_context *i915; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Software primitive */ + unsigned prim; + + /** Hardware primitive */ + unsigned hwprim; + + /** Genereate a vertex list */ + unsigned fallback; + + /* Stuff for the vbo */ + struct pipe_buffer *vbo; + size_t vbo_size; + size_t vbo_offset; + void *vbo_ptr; + size_t vbo_alloc_size; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct i915_vbuf_render *)render; +} + + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived( i915 ); + } + + return &i915->current.vertex_info; +} + + +static void * +i915_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_screen *screen = i915->pipe.screen; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + /* FIXME: handle failure */ + assert(!i915->vbo); + + if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { + } else { + i915->vbo_flushed = 0; + pipe_buffer_reference(screen, &i915_render->vbo, NULL); + } + + if (!i915_render->vbo) { + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); + i915_render->vbo_offset = 0; + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); + } + + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_offset; + i915->dirty |= I915_NEW_VBO; + + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; +} + + +static boolean +i915_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + i915_render->prim = prim; + + switch(prim) { + case PIPE_PRIM_POINTS: + i915_render->hwprim = PRIM3D_POINTLIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINES: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINE_LOOP: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = PIPE_PRIM_LINE_LOOP; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + i915_render->hwprim = PRIM3D_LINESTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLES: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + i915_render->hwprim = PRIM3D_TRISTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + i915_render->hwprim = PRIM3D_TRIFAN; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_QUADS: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUADS; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUAD_STRIP; + return TRUE; + case PIPE_PRIM_POLYGON: + i915_render->hwprim = PRIM3D_POLY; + i915_render->fallback = 0; + return TRUE; + default: + /* FIXME: Actually, can handle a lot more just fine... */ + return FALSE; + } +} + + + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices( struct vbuf_render *render, + unsigned start, uint nr, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + unsigned end = start + nr; + switch(type) { + case 0: + for (i = start; i+1 < end; i += 2) + OUT_BATCH( (i+0) | (i+1) << 16 ); + if (i < end) + OUT_BATCH( i ); + break; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) { + for (i = start + 1; i < end; i++) + OUT_BATCH( (i-0) | (i+0) << 16 ); + OUT_BATCH( (i-0) | ( start) << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = start; i + 3 < end; i += 4) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+1) << 16 ); + OUT_BATCH( (i+2) | (i+3) << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = start; i + 3 < end; i += 2) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+2) << 16 ); + OUT_BATCH( (i+0) | (i+3) << 16 ); + } + break; + default: + assert(0); + } +} + +static unsigned +draw_arrays_calc_nr_indices( uint nr, unsigned type ) +{ + switch (type) { + case 0: + return nr; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) + return nr * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +draw_arrays_fallback( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned nr_indices; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); + if (!nr_indices) + return; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + + draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); + +out: + return; +} + +static void +i915_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (i915_render->fallback) { + draw_arrays_fallback( render, start, nr ); + return; + } + + /* JB: TODO submit direct cmds */ + draw_arrays_fallback( render, start, nr ); +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices( struct vbuf_render *render, + const ushort *indices, + uint nr_indices, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + switch(type) { + case 0: + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( indices[i] | indices[i+1] << 16 ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] ); + } + break; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) { + for (i = 1; i < nr_indices; i++) + OUT_BATCH( indices[i-1] | indices[i] << 16 ); + OUT_BATCH( indices[i-1] | indices[0] << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = 0; i + 3 < nr_indices; i += 4) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+2] | indices[i+3] << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i + 3 < nr_indices; i += 2) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+2] << 16 ); + OUT_BATCH( indices[i+0] | indices[i+3] << 16 ); + } + break; + default: + assert(0); + break; + } +} + +static unsigned +draw_calc_nr_indices( uint nr_indices, unsigned type ) +{ + switch (type) { + case 0: + return nr_indices; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) + return nr_indices * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr_indices / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr_indices - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +i915_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned save_nr_indices; + + save_nr_indices = nr_indices; + + nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); + if (!nr_indices) + return; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + draw_generate_indices( render, + indices, + save_nr_indices, + i915_render->fallback ); + +out: + return; +} + + +static void +i915_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + size_t size = (size_t)vertex_size * (size_t)vertices_used; + + assert(i915->vbo); + + i915_render->vbo_offset += size; + i915->vbo = NULL; + i915->dirty |= I915_NEW_VBO; +} + + +static void +i915_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + FREE(i915_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create( struct i915_context *i915 ) +{ + struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + struct pipe_screen *screen = i915->pipe.screen; + + i915_render->i915 = i915; + + i915_render->base.max_vertex_buffer_bytes = 128*1024; + + /* NOTE: it must be such that state and vertices indices fit in a single + * batch buffer. + */ + i915_render->base.max_indices = 16*1024; + + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; + i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.set_primitive = i915_vbuf_render_set_primitive; + i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; + i915_render->base.release_vertices = i915_vbuf_render_release_vertices; + i915_render->base.destroy = i915_vbuf_render_destroy; + + i915_render->vbo_alloc_size = 128 * 4096; + i915_render->vbo_size = i915_render->vbo_alloc_size; + i915_render->vbo_offset = 0; + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); + + return &i915_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = i915_vbuf_render_create(i915); + if(!render) + return NULL; + + stage = draw_vbuf_stage( i915->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + /** TODO JB: this shouldn't be here */ + draw_set_render(i915->draw, render); + + return stage; +} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h new file mode 100644 index 0000000000..04620fec68 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLOR_BUF_8BIT 0 +#define COLOR_BUF_RGB555 (1<<8) +#define COLOR_BUF_RGB565 (2<<8) +#define COLOR_BUF_ARGB8888 (3<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + + +#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) +#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT) +#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT) + + + + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ + +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +#define _3DSTATE_MAP_PALETTE_LOAD_32 (CMD_3D|(0x1d<<24)|(0x8f<<16)) +/* subsequent dwords up to length (max 16) are ARGB8888 color values */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + + + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER (0x30<<23) +#define MI_BATCH_BUFFER_START (0x31<<23) +#define MI_BATCH_BUFFER_END (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c new file mode 100644 index 0000000000..1c976082df --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -0,0 +1,284 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_string.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_screen.h" +#include "i915_texture.h" + + +static const char * +i915_get_vendor( struct pipe_screen *pscreen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +i915_get_name( struct pipe_screen *pscreen ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_screen(pscreen)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); + return buffer; +} + + +static int +i915_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +i915_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = surface_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +static void +i915_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +static void * +i915_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + char *map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify contexts of a texture change. + */ + /* i915_screen(screen)->timestamp++; */ + } + + return map + surface->offset; +} + +static void +i915_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + pipe_buffer_unmap( screen, surface->buffer ); +} + + + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen); + + if (!i915screen) + return NULL; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + i915screen->is_i945 = FALSE; + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + i915screen->is_i945 = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915screen->pci_id = pci_id; + + i915screen->screen.winsys = winsys; + + i915screen->screen.destroy = i915_destroy_screen; + + i915screen->screen.get_name = i915_get_name; + i915screen->screen.get_vendor = i915_get_vendor; + i915screen->screen.get_param = i915_get_param; + i915screen->screen.get_paramf = i915_get_paramf; + i915screen->screen.is_format_supported = i915_is_format_supported; + i915screen->screen.surface_map = i915_surface_map; + i915screen->screen.surface_unmap = i915_surface_unmap; + + i915_init_screen_texture_functions(&i915screen->screen); + + return &i915screen->screen; +} diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h new file mode 100644 index 0000000000..73b0ff05ce --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_SCREEN_H +#define I915_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct i915_screen +{ + struct pipe_screen screen; + + boolean is_i945; + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) +{ + return (struct i915_screen *) pscreen; +} + + +extern struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c new file mode 100644 index 0000000000..d2487d8277 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -0,0 +1,788 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" +#include "i915_fpc.h" + +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; +// case PIPE_TEX_WRAP_MIRRORED_REPEAT: +// return TEXCOORDMODE_MIRROR; + default: + return TEXCOORDMODE_WRAP; + } +} + +static unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return FILTER_ANISOTROPIC; + default: + assert(0); + return FILTER_NEAREST; + } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return MIPFILTER_LINEAR; + default: + assert(0); + return MIPFILTER_NONE; + } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + + { + unsigned eqRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + unsigned eqA = blend->alpha_func; + unsigned srcA = blend->alpha_src_factor; + unsigned dstA = blend->alpha_dst_factor; + + /* Special handling for MIN/MAX filter modes handled at + * state_tracker level. + */ + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_ENABLE | + IAB_MODIFY_FUNC | + IAB_MODIFY_SRC_FACTOR | + IAB_MODIFY_DST_FACTOR | + SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | + DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | + (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); + } + else { + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + 0); + } + } + + cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + + if (blend->logicop_enable) + cso_data->LIS5 |= S5_LOGICOP_ENABLE; + + if (blend->dither) + cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + + if ((blend->colormask & PIPE_MASK_R) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_RED; + + if ((blend->colormask & PIPE_MASK_G) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + + if ((blend->colormask & PIPE_MASK_B) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + + if ((blend->colormask & PIPE_MASK_A) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + + if (blend->blend_enable) { + unsigned funcRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | + SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | + DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | + (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); + } + + return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend = (struct i915_blend_state*)blend; + + i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend_color = *blend_color; + + i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); + const unsigned ws = sampler->wrap_s; + const unsigned wt = sampler->wrap_t; + const unsigned wr = sampler->wrap_r; + unsigned minFilt, magFilt; + unsigned mipFilt; + + cso->templ = sampler; + + mipFilt = translate_mip_filter(sampler->min_mip_filter); + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; + } + + { + int b = (int) (sampler->lod_bias * 16.0); + b = CLAMP(b, -256, 255); + cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + } + + /* Shadow: + */ + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + cso->state[0] |= (SS2_SHADOW_ENABLE | + i915_translate_compare_func(sampler->compare_func)); + + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } + + cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + + cso->state[1] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + + if (sampler->normalized_coords) + cso->state[1] |= SS3_NORMALIZED_COORDS; + + { + int minlod = (int) (16.0 * sampler->min_lod); + int maxlod = (int) (16.0 * sampler->max_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + maxlod = CLAMP(maxlod, 0, 16 * 11); + + if (minlod > maxlod) + maxlod = minlod; + + cso->minlod = minlod; + cso->maxlod = maxlod; + } + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + cso->state[2] = I915PACKCOLOR8888(r, g, b, a); + } + return cso; +} + +static void i915_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; + + draw_flush(i915->draw); + + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; + + i915->num_samplers = num; + + i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + + { + int testmask = depth_stencil->stencil[0].value_mask & 0xff; + int writemask = depth_stencil->stencil[0].write_mask & 0xff; + + cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(testmask) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(writemask)); + } + + if (depth_stencil->stencil[0].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[0].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); + int ref = depth_stencil->stencil[0].ref_value & 0xff; + + cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE | + (ref << S5_STENCIL_REF_SHIFT) | + (test << S5_STENCIL_TEST_FUNC_SHIFT) | + (fop << S5_STENCIL_FAIL_SHIFT) | + (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + } + + if (depth_stencil->stencil[1].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[1].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); + int ref = depth_stencil->stencil[1].ref_value & 0xff; + int tmask = depth_stencil->stencil[1].value_mask & 0xff; + int wmask = depth_stencil->stencil[1].write_mask & 0xff; + + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE | + BFO_ENABLE_STENCIL_REF | + BFO_STENCIL_TWO_SIDE | + (ref << BFO_STENCIL_REF_SHIFT) | + (test << BFO_STENCIL_TEST_SHIFT) | + (fop << BFO_STENCIL_FAIL_SHIFT) | + (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | + (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); + } + else { + /* This actually disables two-side stencil: The bit set is a + * modify-enable bit to indicate we are changing the two-side + * setting. Then there is a symbolic zero to show that we are + * setting the flag to zero/off. + */ + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_TWO_SIDE | + 0); + cso->bfo[1] = 0; + } + + if (depth_stencil->depth.enabled) { + int func = i915_translate_compare_func(depth_stencil->depth.func); + + cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | + (func << S6_DEPTH_TEST_FUNC_SHIFT)); + + if (depth_stencil->depth.writemask) + cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; + } + + if (depth_stencil->alpha.enabled) { + int test = i915_translate_compare_func(depth_stencil->alpha.func); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref); + + cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | + (test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); + } + + return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + + i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + memcpy( &i915->scissor, scissor, sizeof(*scissor) ); + i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state.tokens = tgsi_dup_tokens(templ->tokens); + + tgsi_scan_shader(templ->tokens, &ifs->info); + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; +} + +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->fs = (struct i915_fragment_shader*) shader; + + i915->dirty |= I915_NEW_FS; +} + +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE((struct tgsi_token *)ifs->state.tokens); + + FREE(ifs); +} + + +static void * +i915_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + draw_flush(i915->draw); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* Make a copy of shader constants. + * During fragment program translation we may add additional + * constants to the array. + * + * We want to consider the situation where some user constants + * (ex: a material color) may change frequently but the shader program + * stays the same. In that case we should only be updating the first + * N constants, leaving any extras from shader translation alone. + */ + if (buf) { + void *mapped; + if (buf->size && + (mapped = ws->buffer_map(ws, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->size); + ws->buffer_unmap(ws, buf->buffer); + i915->current.num_user_constants[shader] + = buf->size / (4 * sizeof(float)); + } + else { + i915->current.num_user_constants[shader] = 0; + } + } + + i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + /* Fixes wrong texture in texobj with VBUF */ + draw_flush(i915->draw); + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); + + i915->num_textures = num; + + i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->framebuffer = *fb; /* struct copy */ + + i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + draw_set_clip_state(i915->draw, clip); + + i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->viewport = *viewport; /* struct copy */ + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(i915->draw, &i915->viewport); + + i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + + cso->templ = rasterizer; + cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + cso->light_twoside = rasterizer->light_twoside; + cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; + cso->ds[1].f = rasterizer->offset_scale; + if (rasterizer->poly_stipple_enable) { + cso->st |= ST1_ENABLE; + } + + if (rasterizer->scissor) + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; + else + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + + switch (rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + cso->LIS4 |= S4_CULLMODE_NONE; + break; + case PIPE_WINDING_CW: + cso->LIS4 |= S4_CULLMODE_CW; + break; + case PIPE_WINDING_CCW: + cso->LIS4 |= S4_CULLMODE_CCW; + break; + case PIPE_WINDING_BOTH: + cso->LIS4 |= S4_CULLMODE_BOTH; + break; + } + + { + int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + + cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + + if (rasterizer->line_smooth) + cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; + } + + { + int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + + cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; + } + + if (rasterizer->flatshade) { + cso->LIS4 |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + + cso->LIS7 = fui( rasterizer->offset_units ); + + + return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, + void *raster ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->rasterizer = (struct i915_rasterizer_state *)raster; + + /* pass-through to draw module */ + draw_set_rasterizer_state(i915->draw, + (i915->rasterizer ? i915->rasterizer->templ : NULL)); + + i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + FREE(raster); +} + +static void i915_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); + i915->num_vertex_buffers = count; + + /* pass-through to draw module */ + draw_set_vertex_buffers(i915->draw, count, buffers); +} + +static void i915_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + i915->num_vertex_elements = count; + /* pass-through to draw module */ + draw_set_vertex_elements(i915->draw, count, elements); +} + + +static void i915_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + /* TODO do something here */ +} + +void +i915_init_state_functions( struct i915_context *i915 ) +{ + i915->pipe.set_edgeflags = i915_set_edgeflags; + i915->pipe.create_blend_state = i915_create_blend_state; + i915->pipe.bind_blend_state = i915_bind_blend_state; + i915->pipe.delete_blend_state = i915_delete_blend_state; + + i915->pipe.create_sampler_state = i915_create_sampler_state; + i915->pipe.bind_sampler_states = i915_bind_sampler_states; + i915->pipe.delete_sampler_state = i915_delete_sampler_state; + + i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; + i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; + i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + + i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; + i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; + i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; + i915->pipe.create_fs_state = i915_create_fs_state; + i915->pipe.bind_fs_state = i915_bind_fs_state; + i915->pipe.delete_fs_state = i915_delete_fs_state; + i915->pipe.create_vs_state = i915_create_vs_state; + i915->pipe.bind_vs_state = i915_bind_vs_state; + i915->pipe.delete_vs_state = i915_delete_vs_state; + + i915->pipe.set_blend_color = i915_set_blend_color; + i915->pipe.set_clip_state = i915_set_clip_state; + i915->pipe.set_constant_buffer = i915_set_constant_buffer; + i915->pipe.set_framebuffer_state = i915_set_framebuffer_state; + + i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; + i915->pipe.set_scissor_state = i915_set_scissor_state; + i915->pipe.set_sampler_textures = i915_set_sampler_textures; + i915->pipe.set_viewport_state = i915_set_viewport_state; + i915->pipe.set_vertex_buffers = i915_set_vertex_buffers; + i915->pipe.set_vertex_elements = i915_set_vertex_elements; +} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h new file mode 100644 index 0000000000..86c6b0027d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { + unsigned dirty; + void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c new file mode 100644 index 0000000000..178d4e8781 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" + + + +/** + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ + const struct i915_fragment_shader *fs = i915->fs; + const enum interp_mode colorInterp = i915->rasterizer->color_interp; + struct vertex_info vinfo; + boolean texCoords[8], colors[2], fog, needW; + uint i; + int src; + + memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; + memset(&vinfo, 0, sizeof(vinfo)); + + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + assert(fs->info.input_semantic_index[i] < 2); + colors[fs->info.input_semantic_index[i]] = TRUE; + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + { + const uint unit = fs->info.input_semantic_index[i]; + assert(unit < 8); + texCoords[unit] = TRUE; + needW = TRUE; + } + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + break; + default: + assert(0); + } + } + + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZW; + vinfo.attrib[0].emit = EMIT_4F; + } + else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZ; + vinfo.attrib[0].emit = EMIT_3F; + } + + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + } + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; + } + vinfo.hwfmt[1] |= hwtc << (i * 4); + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { + /* Need to set this flag so that the LIS2/4 registers get set. + * It also means the i915_update_immediate() function must be called + * after this one, in i915_update_derived(). + */ + i915->dirty |= I915_NEW_VERTEX_FORMAT; + + memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); + } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) + calculate_vertex_layout( i915 ); + + if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) + i915_update_samplers(i915); + + if (i915->dirty & I915_NEW_TEXTURE) + i915_update_textures(i915); + + if (i915->dirty) + i915_update_immediate( i915 ); + + if (i915->dirty) + i915_update_dynamic( i915 ); + + if (i915->dirty & I915_NEW_FS) { + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ + } + + /* HW emit currently references framebuffer state directly: + */ + if (i915->dirty & I915_NEW_FRAMEBUFFER) + i915->hardware_dirty |= I915_HW_STATIC; + + i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c new file mode 100644 index 0000000000..86126a5a15 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism. + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords ) +{ + unsigned i; + + for (i = 0; i < dwords; i++) + i915->current.dynamic[offset + i] = src[i]; + + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop + */ +static void upload_MODES4( struct i915_context *i915 ) +{ + unsigned modes4 = 0; + + /* I915_NEW_STENCIL */ + modes4 |= i915->depth_stencil->stencil_modes4; + /* I915_NEW_BLEND */ + modes4 |= i915->blend->modes4; + + /* Always, so that we know when state is in-active: + */ + set_dynamic_indirect( i915, + I915_DYNAMIC_MODES4, + &modes4, + 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_BFO_0, + &(i915->depth_stencil->bfo[0]), + 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { + I915_NEW_DEPTH_STENCIL, + upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ + unsigned bc[2]; + + memset( bc, 0, sizeof(bc) ); + + /* I915_NEW_BLEND {_COLOR} + */ + { + const float *color = i915->blend_color.color; + + bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + bc[1] = pack_ui32_float4( color[0], + color[1], + color[2], + color[3] ); + } + + set_dynamic_indirect( i915, + I915_DYNAMIC_BC_0, + bc, + 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { + I915_NEW_BLEND, + upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ + unsigned iab = i915->blend->iab; + + + set_dynamic_indirect( i915, + I915_DYNAMIC_IAB, + &iab, + 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { + I915_NEW_BLEND, + upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { + I915_NEW_RASTERIZER, + upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ + unsigned st[2]; + + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* I915_NEW_RASTERIZER + */ + st[1] |= i915->rasterizer->st; + + + /* I915_NEW_STIPPLE + */ + { + const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; + ubyte p[4]; + + p[0] = mask[12] & 0xf; + p[1] = mask[8] & 0xf; + p[2] = mask[4] & 0xf; + p[3] = mask[0] & 0xf; + + /* Not sure what to do about fallbacks, so for now just dont: + */ + st[1] |= ((p[0] << 0) | + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); + } + + + set_dynamic_indirect( i915, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { + I915_NEW_RASTERIZER | I915_NEW_STIPPLE, + upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { + I915_NEW_RASTERIZER, + upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ + unsigned x1 = i915->scissor.minx; + unsigned y1 = i915->scissor.miny; + unsigned x2 = i915->scissor.maxx; + unsigned y2 = i915->scissor.maxy; + unsigned sc[3]; + + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; + sc[1] = (y1 << 16) | (x1 & 0xffff); + sc[2] = (y2 << 16) | (x2 & 0xffff); + + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { + I915_NEW_SCISSOR, + upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_MODES4, + &i915_upload_BFO, + &i915_upload_BLENDCOLOR, + &i915_upload_IAB, + &i915_upload_DEPTHSCALE, + &i915_upload_STIPPLE, + &i915_upload_SCISSOR_ENABLE, + &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c new file mode 100644 index 0000000000..9bd6f92323 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -0,0 +1,402 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_R5G6B5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ + switch (zformat) { + case PIPE_FORMAT_S8Z24_UNORM: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + /* XXX: there must be an easier way */ + const unsigned dwords = ( 14 + + 7 + + I915_MAX_DYNAMIC + + 8 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_MAX_CONSTANT*4 + +#if 0 + i915->current.program_len + +#else + i915->fs->program_len + +#endif + 6 + ) * 3/2; /* plus 50% margin */ + const unsigned relocs = ( I915_TEX_UNITS + + 3 + ) * 3/2; /* plus 50% margin */ + +#if 0 + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif + + if(!BEGIN_BATCH(dwords, relocs)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(dwords, relocs)); + } + + /* 14 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_INVARIENT) + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D); + + /* Need to initialize this to zero. + */ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + /* disable indirect state for now + */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); + OUT_BATCH(0); + } + + /* 7 dwords, 1 relocs */ + if (i915->hardware_dirty & I915_HW_IMMEDIATE) + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | + I1_LOAD_S(1) | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | + I1_LOAD_S(6) | + (5)); + + if(i915->vbo) + OUT_RELOC(i915->vbo, + I915_BUFFER_ACCESS_READ, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + /* FIXME: we should not do this */ + OUT_BATCH(0); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + } + + /* I915_MAX_DYNAMIC dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_DYNAMIC) + { + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + OUT_BATCH(i915->current.dynamic[i]); + } + } + + /* 8 dwords, 2 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) + { + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + + if (cbuf_surface) { + unsigned cpitch = cbuf_surface->stride; + unsigned ctile = BUF_3D_USE_FENCE; + if (cbuf_surface->texture && + ((struct i915_texture*)(cbuf_surface->texture))->tiled) { + ctile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(cpitch) | /* pitch in bytes */ + ctile); + + OUT_RELOC(cbuf_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + cbuf_surface->offset); + } + + /* What happens if no zbuf?? + */ + if (depth_surface) { + unsigned zpitch = depth_surface->stride; + unsigned ztile = BUF_3D_USE_FENCE; + if (depth_surface->texture && + ((struct i915_texture*)(depth_surface->texture))->tiled) { + ztile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_DEPTH | + BUF_3D_PITCH(zpitch) | /* pitch in bytes */ + ztile); + + OUT_RELOC(depth_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + depth_surface->offset); + } + + { + unsigned cformat, zformat = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) + zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat ); + } + } + +#if 01 + /* texture images */ + /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ + if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) + { + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct pipe_buffer *buf = + i915->texture[unit]->buffer; + uint offset = 0; + assert(buf); + + count++; + + OUT_RELOC(buf, + I915_BUFFER_ACCESS_READ, + offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } + } +#endif + +#if 01 + /* samplers */ + /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_SAMPLER) + { + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } + } +#endif + + /* constants */ + /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } + } + + /* Fragment program */ + /* i915->current.program_len dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } + } + + /* drawing surface size */ + /* 6 dwords, 0 relocs */ + { + uint w, h; + boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + (void)k; + assert(k); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(0); + } + + + i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c new file mode 100644 index 0000000000..8c16bb4e27 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -0,0 +1,225 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "util/u_memory.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state. + */ +static void upload_S0S1(struct i915_context *i915) +{ + unsigned LIS0, LIS1; + + /* INTEL_NEW_VBO */ + /* TODO: re-use vertex buffers here? */ + LIS0 = i915->vbo_offset; + + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + { + unsigned vertex_size = i915->current.vertex_info.size; + + LIS1 = ((vertex_size << 24) | + (vertex_size << 16)); + } + + /* INTEL_NEW_VBO */ + /* TODO: use a vertex generation number to track vbo changes */ + if (1 || + i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + { + i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; + i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S0S1 = { + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, + upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ + unsigned LIS2, LIS4; + + /* I915_NEW_VERTEX_FORMAT */ + { + LIS2 = i915->current.vertex_info.hwfmt[1]; + LIS4 = i915->current.vertex_info.hwfmt[0]; + /* + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + */ + assert(LIS4); /* should never be zero? */ + } + + LIS4 |= i915->rasterizer->LIS4; + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, + upload_S2S4 +}; + + + +/*********************************************************************** + * + */ +static void upload_S5( struct i915_context *i915 ) +{ + unsigned LIS5 = 0; + + LIS5 |= i915->depth_stencil->stencil_LIS5; + + LIS5 |= i915->blend->LIS5; + +#if 0 + /* I915_NEW_RASTERIZER */ + if (i915->state.Polygon->OffsetFill) { + LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; + } +#endif + + + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S5 = { + (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), + upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ + unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + + /* I915_NEW_FRAMEBUFFER + */ + if (i915->framebuffer.cbufs[0]) + LIS6 |= S6_COLOR_WRITE_ENABLE; + + /* I915_NEW_BLEND + */ + LIS6 |= i915->blend->LIS6; + + /* I915_NEW_DEPTH + */ + LIS6 |= i915->depth_stencil->depth_LIS6; + + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S6 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, + upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ + unsigned LIS7; + + /* I915_NEW_RASTERIZER + */ + LIS7 = i915->rasterizer->LIS7; + + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S7 = { + I915_NEW_RASTERIZER, + upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_S0S1, + &i915_upload_S2S4, + &i915_upload_S5, + &i915_upload_S6, + &i915_upload_S7 +}; + +/* + */ +void i915_update_immediate( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h new file mode 100644 index 0000000000..378de8f9c4 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_NEVER; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LESS; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_ALWAYS; + default: + return COMPAREFUNC_ALWAYS; + } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACT_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BLENDFACT_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACT_DST_COLR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACT_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACT_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + default: + return BLENDFACT_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BLENDFUNC_ADD; + case PIPE_BLEND_MIN: + return BLENDFUNC_MIN; + case PIPE_BLEND_MAX: + return BLENDFUNC_MAX; + case PIPE_BLEND_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; + } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ + switch (opcode) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INV; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_AND_INV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INV; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INV; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ + boolean ok; + + switch (hw_prim) { + case PRIM3D_POINTLIST: + ok = (nr >= 1); + assert(ok); + break; + case PRIM3D_LINELIST: + ok = (nr >= 2) && (nr % 2) == 0; + assert(ok); + break; + case PRIM3D_LINESTRIP: + ok = (nr >= 2); + assert(ok); + break; + case PRIM3D_TRILIST: + ok = (nr >= 3) && (nr % 3) == 0; + assert(ok); + break; + case PRIM3D_TRISTRIP: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_TRIFAN: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_POLY: + ok = (nr >= 3); + assert(ok); + break; + default: + assert(0); + ok = 0; + break; + } + + return ok; +} + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c new file mode 100644 index 0000000000..c09c10601b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, + uint unit, + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3] ) +{ + const struct pipe_texture *pt = &tex->base; + unsigned minlod, lastlod; + + /* Need to do this after updating the maps, which call the + * intel_finalize_mipmap_tree and hence can update firstLevel: + */ + state[0] = sampler->state[0]; + state[1] = sampler->state[1]; + state[2] = sampler->state[2]; + + if (pt->format == PIPE_FORMAT_YCBCR || + pt->format == PIPE_FORMAT_YCBCR_REV) + state[0] |= SS2_COLORSPACE_CONVERSION; + + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + * + * XXX: Check if this is true on i945. + * XXX: Check if this bug got fixed in release silicon. + */ +#if 0 + { + const unsigned ws = sampler->templ->wrap_s; + const unsigned wt = sampler->templ->wrap_t; + const unsigned wr = sampler->templ->wrap_r; + if (pt->target == PIPE_TEXTURE_3D && + (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || + sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && + (ws == PIPE_TEX_WRAP_CLAMP || + wt == PIPE_TEX_WRAP_CLAMP || + wr == PIPE_TEX_WRAP_CLAMP || + ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { + if (i915->strict_conformance) { + assert(0); + /* sampler->fallback = true; */ + /* TODO */ + } + } + } +#endif + + /* See note at the top of file */ + minlod = sampler->minlod; + lastlod = pt->last_level << 4; + + if (lastlod < minlod) { + minlod = lastlod; + } + + state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); + state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ + uint unit; + + i915->current.sampler_enable_nr = 0; + i915->current.sampler_enable_flags = 0x0; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + update_sampler( i915, + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); + } + } + + i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_L8_UNORM: + return MAPSURF_8BIT | MT_8BIT_L8; + case PIPE_FORMAT_I8_UNORM: + return MAPSURF_8BIT | MT_8BIT_I8; + case PIPE_FORMAT_A8_UNORM: + return MAPSURF_8BIT | MT_8BIT_A8; + case PIPE_FORMAT_A8L8_UNORM: + return MAPSURF_16BIT | MT_16BIT_AY88; + case PIPE_FORMAT_R5G6B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case PIPE_FORMAT_YCBCR_REV: + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); + case PIPE_FORMAT_YCBCR: + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif + case PIPE_FORMAT_Z16_UNORM: + return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 + case PIPE_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGB_DXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); + case PIPE_FORMAT_RGBA_DXT3: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); + case PIPE_FORMAT_RGBA_DXT5: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif + case PIPE_FORMAT_S8Z24_UNORM: + return (MAPSURF_32BIT | MT_32BIT_xI824); + default: + debug_printf("i915: translate_texture_format() bad image format %x\n", + pipeFormat); + assert(0); + return 0; + } +} + + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]) +{ + const struct pipe_texture *pt = &tex->base; + uint format, pitch; + const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint num_levels = pt->last_level; + unsigned max_lod = num_levels * 4; + unsigned tiled = MS3_USE_FENCE_REGS; + + assert(tex); + assert(width); + assert(height); + assert(depth); + + format = translate_texture_format(pt->format); + pitch = tex->stride; + + assert(format); + assert(pitch); + + if (tex->tiled) { + assert(!((pitch - 1) & pitch)); + tiled = MS3_TILED_SURFACE; + } + + /* MS3 state */ + state[0] = + (((height - 1) << MS3_HEIGHT_SHIFT) + | ((width - 1) << MS3_WIDTH_SHIFT) + | format + | tiled); + + /* + * XXX When min_filter != mag_filter and there's just one mipmap level, + * set max_lod = 1 to make sure i915 chooses between min/mag filtering. + */ + + /* See note at the top of file */ + if (max_lod > (sampler->maxlod >> 2)) + max_lod = sampler->maxlod >> 2; + + /* MS4 state */ + state[1] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) + | MS4_CUBE_FACE_ENA_MASK + | ((max_lod) << MS4_MAX_LOD_SHIFT) + | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ + uint unit; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + } + } + + i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c new file mode 100644 index 0000000000..62f1926644 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); + + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->stride : src->stride, + srcx, do_flip ? height - 1 - srcy : srcy); + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + i915_copy_blit( i915_context(pipe), + do_flip, + dst->block.size, + (short) src->stride, src->buffer, src->offset, + (short) dst->stride, dst->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); + } +} + + +static void +i915_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + i915_fill_blit( i915_context(pipe), + dst->block.size, + (short) dst->stride, + dst->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); + } +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ + i915->pipe.surface_copy = i915_surface_copy; + i915->pipe.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c new file mode 100644 index 0000000000..2f5459af67 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -0,0 +1,775 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" +#include "i915_screen.h" + +/* + * Helper function and arrays + */ + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + +static unsigned +power_of_two(unsigned x) +{ + unsigned value = 1; + while (value < x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, + unsigned level, + unsigned nr_images, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, + unsigned level, unsigned img, unsigned x, unsigned y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + + /* + printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); + */ +} + + +/* + * Layout functions + */ + + +/** + * Special case to deal with display targets. + */ +static boolean +i915_displaytarget_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return 0; + + i915_miptree_set_level_info( tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1 ); + i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); + + if (tex->base.width[0] >= 128) { + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); +#if 0 /* used for tiled display targets */ + tex->tiled = 1; +#endif + } else { + tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); + tex->total_nblocksy = tex->base.nblocksy[0]; + } + + /* + printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + */ + + return 1; +} + +static void +i945_miptree_layout_2d( struct i915_texture *tex ) +{ + struct pipe_texture *pt = &tex->base; + const int align_x = 2, align_y = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + /* used for tiled display targets */ + if (0) + if (i915_displaytarget_layout(tex)) + return; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_nblocksx + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); + + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; + } + + /* Pitch must be a whole number of dwords + */ + tex->stride = align(tex->stride, 64); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, x, y); + + nblocksy = align(nblocksy, align_y); + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align(nblocksx, align_x); + } + else { + y += nblocksy; + } + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + const unsigned nblocks = pt->nblocksx[0]; + unsigned face; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + /* + printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + */ + + assert(width == height); /* cubemap images are square */ + + /* + * XXX Should only be used for compressed formats. But lets + * keep this code active just in case. + * + * Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (nblocks > 32) + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + else + tex->stride = 14 * 8 * pt->block.size; + + tex->total_nblocksy = nblocks * 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + +#if 0 /* Fix and enable this code for compressed formats */ + if (nblocks == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (nblocks < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } +#endif + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + +#if 0 /* Fix and enable this code for compressed formats */ + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + default: +#endif + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; +#if 0 + break; + } +#endif + } + } +} + +static boolean +i915_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: { + const unsigned nblocks = pt->nblocksx[0]; + unsigned face; + unsigned width = pt->width[0], height = pt->height[0]; + + assert(width == height); /* cubemap images are square */ + + /* double pitch for cube layouts */ + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->total_nblocksy = nblocks * 4; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + width, height, + 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned stack_nblocksy = 0; + + /* Calculate the size of a single slice. + */ + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* XXX: hardware expects/requires 9 levels at minimum. + */ + for (level = 0; level <= MAX2(8, pt->last_level); + level++) { + i915_miptree_set_level_info(tex, level, depth, + width, height, depth); + + + stack_nblocksy += MAX2(2, nblocksy); + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + + /* Fixup depth image_offsets: + */ + depth = pt->depth[0]; + for (level = 0; level <= pt->last_level; level++) { + unsigned i; + for (i = 0; i < depth; i++) + i915_miptree_set_image_offset(tex, level, i, + 0, i * stack_nblocksy); + + depth = minify(depth); + } + + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + tex->total_nblocksy = stack_nblocksy * pt->depth[0]; + break; + } + + default:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, + width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, + 0, tex->total_nblocksy); + + nblocksy = round_up(MAX2(2, nblocksy), 2); + + tex->total_nblocksy += nblocksy; + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + break; + } + } + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); + */ + + return TRUE; +} + + +static boolean +i945_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + i945_miptree_layout_cube(tex); + break; + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + pack_y_pitch = MAX2(pt->nblocksy[0], 2); + pack_x_pitch = tex->stride / pt->block.size; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + unsigned q, j; + + i915_miptree_set_level_info(tex, level, nr_images, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_nblocksy += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + break; + } + + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: +// case PIPE_TEXTURE_RECTANGLE: + i945_miptree_layout_2d(tex); + break; + default: + assert(0); + return FALSE; + } + + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); + */ + + return TRUE; +} + + +static struct pipe_texture * +i915_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct i915_screen *i915screen = i915_screen(screen); + struct pipe_winsys *ws = screen->winsys; + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + size_t tex_size; + + if (!tex) + return NULL; + + tex->base = *templat; + tex->base.refcount = 1; + tex->base.screen = screen; + + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + + if (i915screen->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; + } else { + if (!i915_miptree_layout(tex)) + goto fail; + } + + tex_size = tex->stride * tex->total_nblocksy; + + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex_size); + + if (!tex->buffer) + goto fail; + +#if 0 + void *ptr = ws->buffer_map(ws, tex->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memset(ptr, 0x80, tex_size); + ws->buffer_unmap(ws, tex->buffer); +#endif + + return &tex->base; + +fail: + FREE(tex); + return NULL; +} + + +static void +i915_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct i915_texture *tex = (struct i915_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(screen, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + FREE(tex->image_offset[i]); + + FREE(tex); + } + *pt = NULL; +} + +static struct pipe_surface * +i915_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } + else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + ps->refcount = 1; + ps->winsys = ws; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; + ps->offset = offset; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + } + return ps; +} + +static struct pipe_texture * +i915_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct i915_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(i915_texture); + if (!tex) + return NULL; + + tex->base = *base; + tex->base.refcount = 1; + tex->base.screen = screen; + + tex->stride = stride[0]; + + i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + pipe_buffer_reference(screen, &tex->buffer, buffer); + + return &tex->base; +} + +void +i915_init_texture_functions(struct i915_context *i915) +{ +// i915->pipe.texture_update = i915_texture_update; +} + +static void +i915_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) +{ + struct pipe_surface *surf = *surface; + + if (--surf->refcount == 0) { + + /* This really should not be possible, but it's actually + * happening quite a bit... Will fix. + */ + if (surf->status == PIPE_SURFACE_STATUS_CLEAR) { + debug_printf("XXX destroying a surface with pending clears...\n"); + assert(0); + } + + pipe_texture_reference(&surf->texture, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); + FREE(surf); + } + + *surface = NULL; +} + +void +i915_init_screen_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = i915_texture_create; + screen->texture_release = i915_texture_release; + screen->get_tex_surface = i915_get_tex_surface; + screen->texture_blanket = i915_texture_blanket; + screen->tex_surface_release = i915_tex_surface_release; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h new file mode 100644 index 0000000000..7225016a9f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct i915_context; +struct pipe_screen; + + +extern void +i915_init_texture_functions(struct i915_context *i915); + + +extern void +i915_init_screen_texture_functions(struct pipe_screen *screen); + + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h new file mode 100644 index 0000000000..81904c2a74 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i915simple requires any window system + * hosting it to implement. This is the only include file in i915simple + * which is public. + * + */ + +#ifndef I915_WINSYS_H +#define I915_WINSYS_H + + +#include "pipe/p_defines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct i915_batchbuffer; +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/** + * Additional winsys interface for i915simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct i915_winsys { + + void (*destroy)( struct i915_winsys *sws ); + + /** + * Get the current batch buffer from the winsys. + */ + struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws ); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)( struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ); + + /** + * Flush the batch. + */ + void (*batch_flush)( struct i915_winsys *sws, + struct pipe_fence_handle **fence ); +}; + +#define I915_BUFFER_ACCESS_WRITE 0x1 +#define I915_BUFFER_ACCESS_READ 0x2 + +#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *i915_create_context( struct pipe_screen *, + struct pipe_winsys *, + struct i915_winsys * ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile new file mode 100644 index 0000000000..e97146e57c --- /dev/null +++ b/src/gallium/drivers/i965simple/Makefile @@ -0,0 +1,54 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965simple + +C_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_screen.c \ + brw_surface.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_pool.c \ + brw_state_upload.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_wm.c \ + brw_wm_iz.c \ + brw_wm_decl.c \ + brw_wm_glsl.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript new file mode 100644 index 0000000000..43fc2a4005 --- /dev/null +++ b/src/gallium/drivers/i965simple/SConscript @@ -0,0 +1,54 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( + target = 'i965simple', + source = [ + 'brw_blit.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_flush.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_screen.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', + 'brw_state_pool.c', + 'brw_state_upload.c', + 'brw_surface.c', + 'brw_tex_layout.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_wm.c', + 'brw_wm_decl.c', + 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + ]) + +Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h new file mode 100644 index 0000000000..5f5932a488 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_batch.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_BATCH_H +#define BRW_BATCH_H + +#include "brw_winsys.h" + +#define BATCH_LOCALS + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +#define BEGIN_BATCH( dwords, relocs ) \ + brw->winsys->batch_start(brw->winsys, dwords, relocs) + +#define OUT_BATCH( dword ) \ + brw->winsys->batch_dword(brw->winsys, dword) + +#define OUT_RELOC( buf, flags, delta ) \ + brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) + +#define ADVANCE_BATCH() \ + brw->winsys->batch_end( brw->winsys ) + +/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. + */ +#define FLUSH_BATCH(fence) do { \ + brw->winsys->batch_flush(brw->winsys, fence); \ + brw->hardware_dirty = ~0; \ +} while (0) + +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) + +#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c new file mode 100644 index 0000000000..8494f70493 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "brw_batch.h" +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void brw_fill_blit(struct brw_context *brw, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1<<24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (y << 16) | x ); + OUT_BATCH( ((y+h) << 16) | (x+w) ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); + OUT_BATCH( color ); + ADVANCE_BATCH(); +} + +static unsigned translate_raster_op(unsigned logicop) +{ + switch(logicop) { + case PIPE_LOGICOP_CLEAR: return 0x00; + case PIPE_LOGICOP_AND: return 0x88; + case PIPE_LOGICOP_AND_REVERSE: return 0x44; + case PIPE_LOGICOP_COPY: return 0xCC; + case PIPE_LOGICOP_AND_INVERTED: return 0x22; + case PIPE_LOGICOP_NOOP: return 0xAA; + case PIPE_LOGICOP_XOR: return 0x66; + case PIPE_LOGICOP_OR: return 0xEE; + case PIPE_LOGICOP_NOR: return 0x11; + case PIPE_LOGICOP_EQUIV: return 0x99; + case PIPE_LOGICOP_INVERT: return 0x55; + case PIPE_LOGICOP_OR_REVERSE: return 0xDD; + case PIPE_LOGICOP_COPY_INVERTED: return 0x33; + case PIPE_LOGICOP_OR_INVERTED: return 0xBB; + case PIPE_LOGICOP_NAND: return 0x77; + case PIPE_LOGICOP_SET: return 0xFF; + default: return 0; + } +} + + +/* Copy BitBlt + */ +void brw_copy_blit(struct brw_context *brw, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_x, src_y, + dst_buffer, dst_pitch, dst_x, dst_y, + w,h,logic_op); + + assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); + assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); + + src_pitch *= cpp; + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | + (1<<25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + dst_pitch &= 0xffff; + src_pitch &= 0xffff; + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + * + * On the other hand, if we always adjust, the hardware doesn't + * know which blit directions to use, so overlapping copypixels get + * the wrong result. + */ + if (dst_pitch > 0 && src_pitch > 0) { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (dst_y << 16) | dst_x ); + OUT_BATCH( (dst_y2 << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset ); + OUT_BATCH( (src_y << 16) | src_x ); + OUT_BATCH( src_pitch ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset ); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); + OUT_BATCH( (0 << 16) | dst_x ); + OUT_BATCH( (h << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset + dst_y * dst_pitch ); + OUT_BATCH( (src_pitch & 0xffff) ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset + src_y * src_pitch ); + ADVANCE_BATCH(); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h new file mode 100644 index 0000000000..111c5d91d3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.h @@ -0,0 +1,33 @@ +#ifndef BRW_BLIT_H +#define BRW_BLIT_H + +#include "pipe/p_compiler.h" + +struct pipe_buffer; +struct brw_context; + +void brw_fill_blit(struct brw_context *intel, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color); +void brw_copy_blit(struct brw_context *intel, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op); +#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c new file mode 100644 index 0000000000..79d4150383 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -0,0 +1,269 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" + + +static int brw_translate_compare_func(int func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return BRW_COMPAREFUNCTION_ALWAYS; +} + +static int brw_translate_stencil_op(int op) +{ + switch(op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + return BRW_STENCILOP_ZERO; + } +} + + +static int brw_translate_logic_op(int opcode) +{ + switch(opcode) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static void upload_cc_vp( struct brw_context *brw ) +{ + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + + brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_state cc; + + memset(&cc, 0, sizeof(cc)); + + /* BRW_NEW_DEPTH_STENCIL */ + if (brw->attribs.DepthStencil->stencil[0].enabled) { + cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; + cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); + cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); + cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zfail_op); + cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zpass_op); + cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; + cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask; + cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask; + + if (brw->attribs.DepthStencil->stencil[1].enabled) { + cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; + cc.cc0.bf_stencil_func = brw_translate_compare_func( + brw->attribs.DepthStencil->stencil[1].func); + cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].fail_op); + cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zfail_op); + cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zpass_op); + cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; + cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask; + cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask; + } + + /* Not really sure about this: + */ + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + cc.cc0.stencil_write_enable = 1; + } + + /* BRW_NEW_BLEND */ + if (brw->attribs.Blend->logicop_enable) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); + } + else if (brw->attribs.Blend->blend_enable) { + int eqRGB = brw->attribs.Blend->rgb_func; + int eqA = brw->attribs.Blend->alpha_func; + int srcRGB = brw->attribs.Blend->rgb_src_factor; + int dstRGB = brw->attribs.Blend->rgb_dst_factor; + int srcA = brw->attribs.Blend->alpha_src_factor; + int dstA = brw->attribs.Blend->alpha_dst_factor; + + if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { + srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; + } + + if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { + srcA = dstA = PIPE_BLENDFACTOR_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + /* BRW_NEW_ALPHATEST + */ + if (brw->attribs.DepthStencil->alpha.enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = + brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); + + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); + + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + } + + if (brw->attribs.Blend->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + if (brw->attribs.DepthStencil->depth.enabled) { + cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; + cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); + cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + + if (BRW_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, + .cache = CACHE_NEW_CC_VP + }, + .update = upload_cc_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c new file mode 100644 index 0000000000..268124cc53 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.c @@ -0,0 +1,206 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const unsigned *program; + unsigned program_size; + unsigned delta; + unsigned i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + c.key = *key; + + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) + if (c.key.attrs & (1<<i)) { + c.offset[i] = delta; + delta += ATTR_SIZE; + } + + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case PIPE_PRIM_TRIANGLES: +#if 0 + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else +#endif + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_CLIP_PROG], + key, sizeof(*key), + &brw->clip.prog_data, + &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* BRW_NEW_RASTER */ + key.do_flat_shading = (brw->attribs.Raster->flatshade); + /* BRW_NEW_CLIP */ + key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ + +#if 0 + key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + if (key.do_unfilled) { + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (brw->attribs.Raster->offset_cw || + brw->attribs.Raster->offset_ccw) { + key.offset_units = brw->attribs.Raster->offset_units; + key.offset_factor = brw->attribs.Raster->offset_scale; + } + key.fill_ccw = brw->attribs.Raster->fill_ccw; + key.fill_cw = brw->attribs.Raster->fill_cw; + key.offset_ccw = brw->attribs.Raster->offset_ccw; + key.offset_cw = brw->attribs.Raster->offset_cw; + if (brw->attribs.Raster->light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + } + } + } +#else + key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; +#endif + + if (!search_cache(brw, &key)) + compile_clip_prog( brw, &key ); +} + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_CLIP | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h new file mode 100644 index 0000000000..d70fc094ff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned nr_userclip:3; + unsigned do_flat_shading:1; + unsigned do_unfilled:1; + unsigned fill_cw:2; /* includes cull information */ + unsigned fill_ccw:2; /* includes cull information */ + unsigned offset_cw:1; + unsigned offset_ccw:1; + unsigned pad0:17; + + unsigned copy_bfc_cw:1; + unsigned copy_bfc_ccw:1; + unsigned clip_mode:3; + unsigned pad1:27; + + float offset_factor; + float offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; + + unsigned first_tmp; + unsigned last_tmp; + + boolean need_direction; + + unsigned last_mrf; + + unsigned header_position_offset; + unsigned offset[PIPE_MAX_ATTRIBS]; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c new file mode 100644 index 0000000000..75d9e5fcda --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_line.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * float dp0 = DOTPROD( vtx0, plane[p] ); + * float dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * float t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * float t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +#if 0 + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +#else + #warning "disabled" +#endif + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c new file mode 100644 index 0000000000..6fce7210d1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_point.c @@ -0,0 +1,47 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c new file mode 100644 index 0000000000..8e78dd51be --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -0,0 +1,93 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_state clip; + + memset(&clip, 0, sizeof(clip)); + + /* CACHE_NEW_CLIP_PROG */ + clip.thread0.grf_reg_count = + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; + clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; + clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + + /* BRW_NEW_URB_FENCE */ + clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + /* CONSTANT */ + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .update = upload_clip_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c new file mode 100644 index 0000000000..c5da7b825e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_tri.c @@ -0,0 +1,566 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + unsigned delta = c->nr_attrs*16 + 32; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +#else + #warning "disabled" +#endif +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c new file mode 100644 index 0000000000..b774a76dd6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c @@ -0,0 +1,477 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0, negate(v2)); + brw_ADD(p, f, v1, negate(v2)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + unsigned i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + float iz = 1.0 / dir.z; + float ac = dir.x * iz; + float bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); + struct brw_reg z = get_element(pos, 2); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + boolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + boolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + unsigned mode, + boolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c new file mode 100644 index 0000000000..6d58ceafff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_util.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +#else + #warning "disabled" +#endif +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + unsigned i; + + /* Just copy the vertex header: + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + unsigned delta = i*16 + 32; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + unsigned delta = i*16 + 32; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +#else + #warning "disabled" +#endif +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header) +{ + struct brw_compile *p = &c->func; + unsigned start = c->last_mrf; + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 0, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ) +{ +#if 0 + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c new file mode 100644 index 0000000000..96920df008 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -0,0 +1,114 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_vs.h" +#include "brw_tex_layout.h" +#include "brw_winsys.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +#ifndef BRW_DEBUG +int BRW_DEBUG = (0); +#endif + + +static void brw_destroy(struct pipe_context *pipe) +{ + struct brw_context *brw = brw_context(pipe); + + if(brw->winsys->destroy) + brw->winsys->destroy(brw->winsys); + + FREE(brw); +} + + +static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + int x, y, w, h; + /* FIXME: corny... */ + + x = 0; + y = 0; + w = ps->width; + h = ps->height; + + pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); +} + + +struct pipe_context *brw_create(struct pipe_screen *screen, + struct brw_winsys *brw_winsys, + unsigned pci_id) +{ + struct brw_context *brw; + + debug_printf("%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); + + brw = CALLOC_STRUCT(brw_context); + if (brw == NULL) + return NULL; + + brw->winsys = brw_winsys; + brw->pipe.winsys = screen->winsys; + brw->pipe.screen = screen; + + brw->pipe.destroy = brw_destroy; + brw->pipe.clear = brw_clear; + + brw_init_surface_functions(brw); + brw_init_texture_functions(brw); + brw_init_state_functions(brw); + brw_init_flush_functions(brw); + brw_init_draw_functions( brw ); + + + brw_init_state( brw ); + + brw->pci_id = pci_id; + brw->dirty = ~0; + brw->hardware_dirty = ~0; + + memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + + return &brw->pipe; +} + diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h new file mode 100644 index 0000000000..3079485180 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -0,0 +1,684 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + debug_printf(__VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + debug_printf(__VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned stride; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_nblocksy; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; + + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c new file mode 100644 index 0000000000..824ee7fd6d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -0,0 +1,369 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_batch.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); + + /* BRW_NEW_VERTEX_PROGRAM */ + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); + unsigned nr_clip_regs = 0; + unsigned total_regs; + +#if 0 + /* BRW_NEW_CLIP ? */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + nr_clip_regs = align(nr_planes * 4, 16); + } +#endif + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + unsigned reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + +#if 0 + if (0) + DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); +#endif + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_VS), + .cache = CACHE_NEW_WM_PROG + }, + .update = calculate_curbe_offsets +}; + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ + struct brw_constant_buffer_state cbs; + memset(&cbs, 0, sizeof(cbs)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cbs.header.opcode = CMD_CONST_BUFFER_STATE; + cbs.header.length = sizeof(cbs)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cbs.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + + +static float fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ + struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; + unsigned sz = brw->curbe.total_size; + unsigned bufsz = sz * sizeof(float); + float *buf; + unsigned i; + + + if (sz == 0) { + struct brw_constant_buffer cb; + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 0; + cb.bits0.buffer_length = 0; + cb.bits0.buffer_address = 0; + BRW_BATCH_STRUCT(brw, &cb); + + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + + return; + } + + buf = (float *)malloc(bufsz); + + memset(buf, 0, bufsz); + + if (brw->curbe.wm_size) { + unsigned offset = brw->curbe.wm_start * 16; + + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; + + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + unsigned offset = brw->curbe.clip_start * 16; + unsigned j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: BRW_NEW_CLIP: + */ + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; + } + } + + + if (brw->curbe.vs_size) { + unsigned offset = brw->curbe.vs_start * 16; + /*unsigned nr = vp->max_const;*/ + const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; + struct pipe_winsys *ws = brw->pipe.winsys; + /* FIXME: buffer size is num_consts + num_immediates */ + if (brw->vs.prog_data->num_consts) { + /* map the vertex constant buffer and copy to curbe: */ + void *data = ws->buffer_map(ws, cbuffer->buffer, 0); + /* FIXME: this is wrong. the cbuffer->size currently + * represents size of consts + immediates. so if we'll + * have both we'll copy over the end of the buffer + * with the subsequent memcpy */ + memcpy(&buf[offset], data, cbuffer->size); + ws->buffer_unmap(ws, cbuffer->buffer); + offset += cbuffer->size; + } + /*immediates*/ + if (brw->vs.prog_data->num_imm) { + memcpy(&buf[offset], brw->vs.prog_data->imm_buf, + brw->vs.prog_data->num_imm * 4 * sizeof(float)); + } + } + + if (1) { + for (i = 0; i < sz; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + free(buf); +/* return; */ + } + else { + if (brw->curbe.last_buf) + free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + + if (!brw_pool_alloc(pool, + bufsz, + 1 << 6, + &brw->curbe.gs_offset)) { + debug_printf("out of GS memory for curbe\n"); + assert(0); + return; + } + + + /* Copy data to the buffer: + */ + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); + } + + /* TODO: only emit the constant_buffer packet when necessary, ie: + - contents have changed + - offset has changed + - hw requirements due to other packets emitted. + */ + { + struct brw_constant_buffer cb; + + memset(&cb, 0, sizeof(cb)); + + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 1; + cb.bits0.buffer_length = sz - 1; + cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + BRW_BATCH_STRUCT(brw, &cb); + } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS), + .cache = (CACHE_NEW_WM_PROG) + }, + .update = upload_constant_buffer +}; + diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h new file mode 100644 index 0000000000..9379a397f6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_defines.h @@ -0,0 +1,852 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c new file mode 100644 index 0000000000..7598e3dc8a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const int reduced_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static void brw_set_prim(struct brw_context *brw, int prim) +{ + PRINT("PRIM: %d\n", prim); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == PIPE_PRIM_QUAD_STRIP && + brw->attribs.Raster->flatshade && + brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && + brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) + prim = PIPE_PRIM_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + + brw_validate_state(brw); + } + +} + + +static unsigned trim(int prim, unsigned length) +{ + if (prim == PIPE_PRIM_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == PIPE_PRIM_QUADS) + return length - length % 4; + else + return length; +} + + + +static boolean brw_emit_prim( struct brw_context *brw, + boolean indexed, + unsigned start, + unsigned count ) + +{ + struct brw_3d_primitive prim_packet; + + if (BRW_DEBUG & DEBUG_PRIMS) + PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim[brw->primitive]; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = trim(brw->primitive, count); + prim_packet.start_vert_location = start; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; + + if (prim_packet.verts_per_instance == 0) + return TRUE; + + return brw_batchbuffer_data( brw->winsys, + &prim_packet, + sizeof(prim_packet) ); +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static boolean brw_try_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, + unsigned count ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Set the first primitive ahead of validate_state: + */ + brw_set_prim(brw, mode); + + /* Upload index, vertex data: + */ + if (index_buffer && + !brw_upload_indices( brw, index_buffer, index_size, start, count )) + return FALSE; + + if (!brw_upload_vertex_buffers(brw)) + return FALSE; + + if (!brw_upload_vertex_elements( brw )) + return FALSE; + + /* XXX: Need to separate validate and upload of state. + */ + if (brw->state.dirty.brw) + brw_validate_state( brw ); + + if (!brw_emit_prim(brw, index_buffer != NULL, + start, count)) + return FALSE; + + return TRUE; +} + + + +static boolean brw_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, count )) + { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, + count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +static boolean brw_draw_arrays( struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +void brw_init_draw_functions( struct brw_context *brw ) +{ + brw->pipe.draw_arrays = brw_draw_arrays; + brw->pipe.draw_elements = brw_draw_elements; +} + + diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h new file mode 100644 index 0000000000..62fe0d5d0e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.h @@ -0,0 +1,55 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "pipe/p_context.h" + +struct brw_context; + + + +void brw_init_draw_functions( struct brw_context *brw ); + + +boolean brw_upload_vertices( struct brw_context *brw, + unsigned min_index, + unsigned max_index ); + +boolean brw_upload_indices(struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count); + +boolean brw_upload_vertex_buffers( struct brw_context *brw ); +boolean brw_upload_vertex_elements( struct brw_context *brw ); + +unsigned brw_translate_surface_format( unsigned id ); + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c new file mode 100644 index 0000000000..7c20ea52af --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + + +struct brw_array_state { + union header_union header; + + struct { + union { + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } bits; + unsigned dword; + } vb0; + + struct pipe_buffer *buffer; + unsigned offset; + + unsigned max_index; + unsigned instance_data_step_rate; + + } vb[BRW_VBP_MAX]; +}; + + + +unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +boolean brw_upload_vertex_buffers( struct brw_context *brw ) +{ + struct brw_array_state vbp; + unsigned nr_enabled = 0; + unsigned i; + + memset(&vbp, 0, sizeof(vbp)); + + /* This is a hardware limit: + */ + + for (i = 0; i < BRW_VEP_MAX; i++) + { + if (brw->vb.vbo_array[i] == NULL) { + nr_enabled = i; + break; + } + + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pad = 0; + vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; + vbp.vb[i].vb0.bits.vb_index = i; + vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; + vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; + vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; + } + + + vbp.header.bits.length = (1 + nr_enabled * 4) - 2; + vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + + BEGIN_BATCH(vbp.header.bits.length+2, 0); + OUT_BATCH( vbp.header.dword ); + + for (i = 0; i < nr_enabled; i++) { + OUT_BATCH( vbp.vb[i].vb0.dword ); + OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, + vbp.vb[i].offset); + OUT_BATCH( vbp.vb[i].max_index ); + OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + } + ADVANCE_BATCH(); + return TRUE; +} + + + +boolean brw_upload_vertex_elements( struct brw_context *brw ) +{ + struct brw_vertex_element_packet vep; + + unsigned i; + unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; + + memset(&vep, 0, sizeof(vep)); + + for (i = 0; i < nr_enabled; i++) + vep.ve[i] = brw->vb.inputs[i]; + + + vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; + vep.header.opcode = CMD_VERTEX_ELEMENT; + brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + + return TRUE; +} + +boolean brw_upload_indices( struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count) +{ + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(ib_size); + ib.header.bits.cut_index_enable = 0; + + + BEGIN_BATCH(4, 0); + OUT_BATCH( ib.header.dword ); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + return TRUE; +} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c new file mode 100644 index 0000000000..e2002d1821 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, unsigned value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const unsigned *brw_get_program( struct brw_compile *p, + unsigned *sz ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const unsigned *)p->store; +} + diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h new file mode 100644 index 0000000000..23151ae9ed --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.h @@ -0,0 +1,888 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* src only, align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + unsigned nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + boolean single_program_flow; +}; + + + +static __inline int type_sz( unsigned type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +static __inline struct brw_reg brw_reg( unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + + struct brw_reg reg; + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +static __inline struct brw_reg brw_vec16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + TGSI_WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, + unsigned type ) +{ + reg.type = type; + return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, + unsigned delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, + unsigned delta ) +{ + reg.nr += delta; + return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, + unsigned bytes ) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( unsigned type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +static __inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +static __inline struct brw_reg brw_imm_d( int d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +static __inline struct brw_reg brw_imm_ud( unsigned ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +static __inline struct brw_reg brw_imm_uw( ushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw; + return imm; +} + +static __inline struct brw_reg brw_imm_w( short w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w; + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static __inline struct brw_reg brw_imm_vf4( unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static __inline struct brw_reg brw_address_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + TGSI_WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static __inline struct brw_reg brw_mask_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static __inline struct brw_reg brw_message_reg( unsigned nr ) +{ + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline unsigned cvt( unsigned val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride ) +{ + + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, + unsigned x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, unsigned value ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control( struct brw_compile *p, boolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); + +void brw_init_compile( struct brw_compile *p ); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c new file mode 100644 index 0000000000..4a94ddefa6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c new file mode 100644 index 0000000000..400a80b6fb --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_emit.c @@ -0,0 +1,1080 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + unsigned msg_length, + unsigned response_length, + unsigned function, + unsigned integer_type, + boolean low_precision, + boolean saturate, + unsigned dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean end_of_thread, + boolean complete, + unsigned offset, + unsigned swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + unsigned pixel_scoreboard_clear, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + unsigned opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ) +{ + struct brw_instruction *insn; + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + unsigned msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + boolean need_stall = 0; + + if(writemask == 0) { +/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != TGSI_WRITEMASK_XYZW) { + unsigned dst_offset = 0; + unsigned i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; +/* debug_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & TGSI_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, FALSE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message(insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c new file mode 100644 index 0000000000..3a65b141f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count) +{ + unsigned i; + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c new file mode 100644 index 0000000000..e6001c30d9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_batch.h" + + +static void brw_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush.flags |= BRW_FLUSH_READ_CACHE; + + BRW_BATCH_STRUCT(brw, &flush); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH( fence ); +} + + + +void brw_init_flush_functions( struct brw_context *brw ) +{ + brw->pipe.flush = brw_flush; +} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c new file mode 100644 index 0000000000..de60868ccc --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.c @@ -0,0 +1,196 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_GS_PROG], + key, sizeof(*key), + &brw->gs.prog_data, + &brw->gs.prog_gs_offset); +} + + +static const int gs_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ + struct brw_gs_prog_key key; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + if (!search_cache(brw, &key)) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_gs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h new file mode 100644 index 0000000000..f09141c6aa --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.h @@ -0,0 +1,75 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned hint_gs_always:1; + unsigned need_gs_prog:1; + unsigned pad:26; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c new file mode 100644 index 0000000000..c3cc90b10f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_emit.c @@ -0,0 +1,148 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + boolean last, + unsigned header) +{ + struct brw_compile *p = &c->func; + boolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c new file mode 100644 index 0000000000..5b8016b2e9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_state gs; + + memset(&gs, 0, sizeof(gs)); + + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs.thread0.grf_reg_count = + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } + else { + gs.thread0.grf_reg_count = 0; + gs.thread0.kernel_start_pointer = 0; + gs.thread3.urb_entry_read_length = 1; + } + + /* BRW_NEW_URB_FENCE */ + gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + /* CONSTANT */ + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + + + brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .update = upload_gs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c new file mode 100644 index 0000000000..be812c5da9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -0,0 +1,488 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_batch.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; + bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; + bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; + bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .brw = BRW_NEW_BLEND, + .cache = 0 + }, + .update = upload_blend_constant_color +}; + + +/*********************************************************************** + * Drawing rectangle + */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct brw_drawrect bdr; + + memset(&bdr, 0, sizeof(bdr)); + bdr.header.opcode = CMD_DRAW_RECT; + bdr.header.length = sizeof(bdr)/4 - 2; + bdr.xmin = 0; + bdr.ymin = 0; + bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; + bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; + bdr.xorg = 0; + bdr.yorg = 0; + + /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted + * uncached in brw_draw.c: + */ + BRW_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_drawing_rect +}; + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct brw_binding_table_pointers btp; + memset(&btp, 0, sizeof(btp)); + + btp.header.opcode = CMD_BINDING_TABLE_PTRS; + btp.header.length = sizeof(btp)/4 - 2; + btp.vs = 0; + btp.gs = 0; + btp.clp = 0; + btp.sf = 0; + btp.wm = brw->wm.bind_ss_offset; + + BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .brw = 0, + .cache = CACHE_NEW_SURF_BIND + }, + .update = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct brw_pipelined_state_pointers psp; + memset(&psp, 0, sizeof(psp)); + + psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; + psp.header.length = sizeof(psp)/4 - 2; + + psp.vs.offset = brw->vs.state_gs_offset >> 5; + psp.sf.offset = brw->sf.state_gs_offset >> 5; + psp.wm.offset = brw->wm.state_gs_offset >> 5; + psp.cc.offset = brw->cc.state_gs_offset >> 5; + + /* GS gets turned on and off regularly. Need to re-emit URB fence + * after this occurs. + */ + if (brw->gs.prog_active) { + psp.gs.offset = brw->gs.state_gs_offset >> 5; + psp.gs.enable = 1; + } + + if (0) { + psp.clp.offset = brw->clip.state_gs_offset >> 5; + psp.clp.enable = 1; + } + + + if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) + brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .brw = 0, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_psp_urb_cbs +}; + +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static void upload_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; + + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + if (depth_surface == NULL) { + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + unsigned int format; + + assert(depth_surface->block.width == 1); + assert(depth_surface->block.height == 1); + switch (depth_surface->block.size) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + OUT_BATCH((depth_surface->stride - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | +// (depth_surface->region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(depth_surface->buffer, + PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((depth_surface->stride/depth_surface->block.size - 1) << 6) | + ((depth_surface->height - 1) << 19)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_depthbuffer, +}; + + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + struct brw_polygon_stipple bps; + unsigned i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + /* XXX: state tracker should send *all* state down initially! + */ + if (brw->attribs.PolygonStipple) + for (i = 0; i < 32; i++) + bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + struct brw_line_stipple bls; + float tmp; + int tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; + bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; + + tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_line_stipple +}; + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; + BRW_BATCH_STRUCT(brw, &flush); + } + + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS; + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + + { + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); +} + + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h new file mode 100644 index 0000000000..9e885c3b3b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_reg.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c new file mode 100644 index 0000000000..ab7cd624b2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "util/u_string.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_tex_layout.h" + + +static const char * +brw_get_vendor( struct pipe_screen *screen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +brw_get_name( struct pipe_screen *screen ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); + return buffer; +} + + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + +static void +brw_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); + + if (!brwscreen) + return NULL; + + brwscreen->pci_id = pci_id; + + brwscreen->screen.winsys = winsys; + + brwscreen->screen.destroy = brw_destroy_screen; + + brwscreen->screen.get_name = brw_get_name; + brwscreen->screen.get_vendor = brw_get_vendor; + brwscreen->screen.get_param = brw_get_param; + brwscreen->screen.get_paramf = brw_get_paramf; + brwscreen->screen.is_format_supported = brw_is_format_supported; + + brw_init_screen_texture_funcs(&brwscreen->screen); + + return &brwscreen->screen; +} diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h new file mode 100644 index 0000000000..d3c70387e6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen screen; + + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + + +extern struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c new file mode 100644 index 0000000000..b82a2e143b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" +#include "tgsi/tgsi_parse.h" + + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + struct brw_sf_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + + + c.nr_attrs = c.key.vp_output_count; + c.nr_attr_regs = (c.nr_attrs+1)/2; + + c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c ); + break; + case SF_POINTS: + c.nr_verts = 1; + brw_emit_point_setup( &c ); + break; + + case SF_UNFILLED_TRIS: + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_SF_PROG], + key, sizeof(*key), + &brw->sf.prog_data, + &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct brw_sf_prog_key key; + struct tgsi_parse_context parse; + int i, done = 0; + + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.vp_output_count = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_FS */ + key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; + + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: +// if (key.attrs & (1<<VERT_RESULT_EDGE)) +// key.primitive = SF_UNFILLED_TRIS; +// else + key.primitive = SF_TRIANGLES; + break; + case PIPE_PRIM_LINES: + key.primitive = SF_LINES; + break; + case PIPE_PRIM_POINTS: + key.primitive = SF_POINTS; + break; + } + + + + /* Scan fp inputs to figure out what interpolation modes are + * required for each incoming vp output. There is an assumption + * that the state tracker makes sure there is a 1:1 linkage between + * these sets of attributes (XXX: position??) + */ + tgsi_parse_init( &parse, fs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; + //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; + //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); + + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + for (i = first; i <= last; i++) + key.const_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_LINEAR: + for (i = first; i <= last; i++) + key.linear_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + for (i = first; i <= last; i++) + key.persp_mask |= (1 << i); + break; + default: + break; + } + + /* Also need stuff for flat shading, twosided color. + */ + + } + break; + default: + done = 1; + break; + } + } + + /* Hack: Adjust for position. Optimize away when not required (ie + * for perspective interpolation). + */ + key.persp_mask <<= 1; + key.linear_mask <<= 1; + key.linear_mask |= 1; + key.const_mask <<= 1; + + debug_printf("key.persp_mask: %x\n", key.persp_mask); + debug_printf("key.linear_mask: %x\n", key.linear_mask); + debug_printf("key.const_mask: %x\n", key.const_mask); + + +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + + if (!search_cache(brw, &key)) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = upload_sf_prog +}; + + + +#if 0 +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h new file mode 100644 index 0000000000..b7ada47560 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + + + +struct brw_sf_prog_key { + unsigned vp_output_count:5; + unsigned fp_input_count:5; + + unsigned primitive:2; + unsigned do_twoside_color:1; + unsigned do_flat_shading:1; + unsigned frontface_ccw:1; + unsigned do_point_sprite:1; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; +}; + +struct brw_sf_point_tex { + boolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + unsigned nr_verts; + unsigned nr_attrs; + unsigned nr_attr_regs; + unsigned nr_setup_attrs; + unsigned nr_setup_regs; +#if 0 + ubyte attr_to_idx[VERT_RESULT_MAX]; + ubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +#endif +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c new file mode 100644 index 0000000000..78d6fa5e9e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_emit.c @@ -0,0 +1,382 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + unsigned reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ + FRAG_BIT_COL0 | \ + FRAG_BIT_COL1) + +static boolean calculate_masks( struct brw_sf_compile *c, + unsigned reg, + ushort *pc, + ushort *pc_persp, + ushort *pc_linear) +{ + boolean is_last_attr = (reg == c->nr_setup_regs - 1); + unsigned persp_mask = c->key.persp_mask; + unsigned linear_mask = c->key.linear_mask; + + debug_printf("persp_mask: %x\n", persp_mask); + debug_printf("linear_mask: %x\n", linear_mask); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + debug_printf("pc: %x\n", *pc); + debug_printf("pc_persp: %x\n", *pc_persp); + debug_printf("pc_linear: %x\n", *pc_linear); + + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + debug_printf("%s START ==============\n", __FUNCTION__); + + c->nr_verts = 3; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + ushort pc = 0, pc_persp = 0, pc_linear = 0; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } + + debug_printf("%s DONE ==============\n", __FUNCTION__); + +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + + c->nr_verts = 2; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + c->nr_verts = 1; + alloc_regs(c); + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c new file mode 100644 index 0000000000..2a5de61c21 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -0,0 +1,181 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_sf_vp(struct brw_context *brw) +{ + struct brw_sf_viewport sfv; + + memset(&sfv, 0, sizeof(sfv)); + + + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; + + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; + } + + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; + + brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), + .cache = 0 + }, + .update = upload_sf_vp +}; + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_state sf; + memset(&sf, 0, sizeof(sf)); + + /* CACHE_NEW_SF_PROG */ + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; + sf.thread3.urb_entry_read_offset = 1; + + /* BRW_NEW_URB_FENCE */ + sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; + sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + sf.sf5.viewport_transform = 1; + + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) + sf.sf6.scissor = 1; + +#if 0 + if (brw->attribs.Polygon->FrontFace == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + + if (brw->attribs.Polygon->CullFlag) { + switch (brw->attribs.Polygon->CullFaceMode) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + default: + assert(0); + break; + } + } + else + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif + + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (brw->attribs.Raster->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + sf.sf6.point_rast_rule = 1; /* opengl conventions */ + + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .update = upload_sf_unit +}; + + diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..86d877d7ef --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -0,0 +1,48 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + +/** + * XXX this obsolete new and no longer compiled. + */ +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->DeclarationRange.Last; + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c new file mode 100644 index 0000000000..af46cb546f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); +} + +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(count <= PIPE_MAX_ATTRIBS); + + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; + + el.ve1.dst_offset = i * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; +} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h new file mode 100644 index 0000000000..de0a6371b8 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -0,0 +1,151 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" +#include "brw_winsys.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +unsigned brw_cache_data(struct brw_cache *cache, + const void *data ); + +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_sz); + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_sz, + const void *data, + unsigned data_sz, + const void *aux, + void *aux_return ); + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, + enum brw_cache_id id) +{ + return brw->cache[id].pool->buffer; +} + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return); + +void brw_pool_fence( struct brw_context *brw, + struct brw_mem_pool *pool, + unsigned fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c new file mode 100644 index 0000000000..43a1c89fc4 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_state.h" +#include "brw_winsys.h" + +#include "util/u_memory.h" + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + +emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; + + + brw_clear_all_caches(brw); + + brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ + clear_batch_cache(brw); + +/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c new file mode 100644 index 0000000000..094248fa69 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -0,0 +1,443 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_state.h" + +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + +#include "util/u_memory.h" + + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static unsigned hash_key( const void *key, unsigned key_size ) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, + unsigned hash, + const void *key, + unsigned key_size) +{ + struct brw_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0) + return c; + } + + return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + unsigned size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); + memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return) +{ + struct brw_cache_item *item; + unsigned addr = 0; + unsigned hash = hash_key(key, key_size); + + item = search_cache(cache, hash, key, key_size); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + *offset_return = addr = item->offset; + } + + if (item == NULL || addr != cache->last_addr) { + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = addr; + } + + return item != NULL; +} + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + const void *data, + unsigned data_size, + const void *aux, + void *aux_return ) +{ + unsigned offset; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + unsigned hash = hash_key(key, key_size); + void *tmp = MALLOC(key_size + cache->aux_size); + + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { + /* Should not be possible: + */ + debug_printf("brw_pool_alloc failed\n"); + exit(1); + } + + memcpy(tmp, key, key_size); + + if (cache->aux_size) + memcpy(tmp+key_size, aux, cache->aux_size); + + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->offset = offset; + item->data_size = data_size; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + + if (aux_return) { + assert(cache->aux_size); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", + cache->name, + data_size, + (void*)cache->pool->buffer, + offset); + + /* Copy data to the buffer: + */ + cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, + cache->pool->buffer, + offset, + data_size, + data, + cache->id); + + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = offset; + + return offset; +} + +/* This doesn't really work with aux data. Use search/upload instead + */ +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_size) +{ + unsigned addr; + + if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { + addr = brw_upload_cache(cache, + data, data_size, + data, data_size, + NULL, NULL); + } + + return addr; +} + +unsigned brw_cache_data(struct brw_cache *cache, + const void *data) +{ + return brw_cache_data_sz(cache, data, cache->key_size); +} + +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void brw_init_cache( struct brw_context *brw, + const char *name, + unsigned id, + unsigned key_size, + unsigned aux_size, + enum pool_type pool_type) +{ + struct brw_cache *cache = &brw->cache[id]; + cache->brw = brw; + cache->id = id; + cache->name = name; + cache->items = NULL; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + + cache->key_size = key_size; + cache->aux_size = aux_size; + switch (pool_type) { + case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; + case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; + default: assert(0); break; + } +} + +void brw_init_caches( struct brw_context *brw ) +{ + + brw_init_cache(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0, + DW_SURFACE_STATE); + + brw_init_cache(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + sizeof(struct brw_surface_binding_table), + 0, + DW_SURFACE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded. They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once? Otherwise the cache could confuse them. Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + * before new data is uploaded to an existing buffer? We + * already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ + struct brw_cache_item *c, *next; + unsigned i; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ + int i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ + unsigned i; + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c new file mode 100644 index 0000000000..007dc8f9de --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -0,0 +1,138 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/** @file brw_state_pool.c + * Implements the state pool allocator. + * + * For the 965, we create two state pools for state cache entries. Objects + * will be allocated into the pools depending on which state base address + * their pointer is relative to in other 965 state. + * + * The state pools are relatively simple: As objects are allocated, increment + * the offset to allocate space. When the pool is "full" (rather, close to + * full), we reset the pool and reset the state cache entries that point into + * the pool. + */ + +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "brw_context.h" +#include "brw_state.h" + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return) +{ + unsigned fixup = align(pool->offset, alignment) - pool->offset; + + size = align(size, 4); + + if (pool->offset + fixup + size >= pool->size) { + debug_printf("%s failed\n", __FUNCTION__); + assert(0); + exit(0); + } + + pool->offset += fixup; + *offset_return = pool->offset; + pool->offset += size; + + return TRUE; +} + +static +void brw_invalidate_pool( struct brw_mem_pool *pool ) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); + + pool->offset = 0; + + brw_clear_all_caches(pool->brw); +} + + +static void brw_init_pool( struct brw_context *brw, + unsigned pool_id, + unsigned size ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pool->size = size; + pool->brw = brw; + + pool->buffer = pipe_buffer_create(brw->pipe.screen, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); +} + +static void brw_destroy_pool( struct brw_context *brw, + unsigned pool_id ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pipe_buffer_reference( pool->brw->pipe.screen, + &pool->buffer, + NULL ); +} + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ) +{ + if (pool->offset > (pool->size * 3) / 4) { + brw->state.dirty.brw |= BRW_NEW_SCENE; + } + +} + +void brw_init_pools( struct brw_context *brw ) +{ + brw_init_pool(brw, BRW_GS_POOL, 0x80000); + brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ + brw_destroy_pool(brw, BRW_GS_POOL); + brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ + brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); + brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c new file mode 100644 index 0000000000..bac9161b5f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" + +#include "util/u_memory.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + + &brw_cc_vp, + &brw_cc_unit, + + &brw_wm_surfaces, /* must do before samplers */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + &brw_pipe_control, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_drawing_rect, + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_line_stipple, + + &brw_psp_urb_cbs, + + &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_pools(brw); + brw_init_caches(brw); + + brw->state.dirty.brw = ~0; + brw->emit_state_always = 0; +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); + brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static boolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + unsigned i; + + if (brw->emit_state_always) + state->brw |= ~0; + + if (state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_SCENE) + brw_clear_batch_cache_flush(brw); + + if (BRW_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) { + atom->update( brw ); + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) + atom->update( brw ); + } + } + + memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h new file mode 100644 index 0000000000..bbb087e95d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_structs.h @@ -0,0 +1,1348 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "pipe/p_compiler.h" + +/* Command packets: + */ +struct header +{ + unsigned length:16; + unsigned opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned dword; +}; + +struct brw_3d_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:3; + unsigned wc_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned operation:2; + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } dest; + + unsigned dword2; + unsigned dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + unsigned length:8; + unsigned pad:2; + unsigned topology:5; + unsigned indexed:1; + unsigned opcode:16; + } header; + + unsigned verts_per_instance; + unsigned start_vert_location; + unsigned instance_count; + unsigned start_instance_location; + unsigned base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + unsigned flags:4; + unsigned pad:12; + unsigned opcode:16; +}; + +struct brw_vf_statistics +{ + unsigned statistics_enable:1; + unsigned pad:15; + unsigned opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + unsigned vs; + unsigned gs; + unsigned clp; + unsigned sf; + unsigned wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned pitch:18; + unsigned format:3; + unsigned pad:4; + unsigned depth_offset_disable:1; + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad2:1; + unsigned surface_type:3; + } bits; + unsigned dword; + } dword1; + + unsigned dword2_base_addr; + + union { + struct { + unsigned pad:1; + unsigned mipmap_layout:1; + unsigned lod:4; + unsigned width:13; + unsigned height:13; + } bits; + unsigned dword; + } dword3; + + union { + struct { + unsigned pad:12; + unsigned min_array_element:9; + unsigned depth:11; + } bits; + unsigned dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + unsigned xmin:16; + unsigned ymin:16; + unsigned xmax:16; + unsigned ymax:16; + unsigned xorg:16; + unsigned yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + unsigned length:8; + unsigned index_format:2; + unsigned cut_index_enable:1; + unsigned pad:5; + unsigned opcode:16; + } bits; + unsigned dword; + + } header; + + unsigned buffer_start; + unsigned buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + unsigned pattern:16; + unsigned pad:16; + } bits0; + + struct + { + unsigned repeat_count:9; + unsigned pad:7; + unsigned inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned pad:5; + unsigned offset:27; + } vs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } gs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } clp; + + struct + { + unsigned pad:5; + unsigned offset:27; + } sf; + + struct + { + unsigned pad:5; + unsigned offset:27; + } wm; + + struct + { + unsigned pad:5; + unsigned offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned y_offset:5; + unsigned pad:3; + unsigned x_offset:5; + unsigned pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + unsigned stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + unsigned pipeline_select:1; + unsigned pad:15; + unsigned opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:2; + unsigned instruction_state_cache_flush_enable:1; + unsigned write_cache_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned post_sync_operation:2; + + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } bits1; + + unsigned data0; + unsigned data1; +}; + + +struct brw_urb_fence +{ + struct + { + unsigned length:8; + unsigned vs_realloc:1; + unsigned gs_realloc:1; + unsigned clp_realloc:1; + unsigned sf_realloc:1; + unsigned vfe_realloc:1; + unsigned cs_realloc:1; + unsigned pad:2; + unsigned opcode:16; + } header; + + struct + { + unsigned vs_fence:10; + unsigned gs_fence:10; + unsigned clp_fence:10; + unsigned pad:2; + } bits0; + + struct + { + unsigned sf_fence:10; + unsigned vf_fence:10; + unsigned cs_fence:10; + unsigned pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + unsigned nr_urb_entries:3; + unsigned pad:1; + unsigned urb_entry_size:5; + unsigned pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + unsigned length:8; + unsigned valid:1; + unsigned pad:7; + unsigned opcode:16; + } header; + + struct + { + unsigned buffer_length:6; + unsigned buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned general_state_address:27; + } bits0; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned surface_state_address:27; + } bits1; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned indirect_object_state_address:27; + } bits2; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned general_state_upper_bound:20; + } bits3; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + unsigned prefetch_count:3; + unsigned pad:3; + unsigned prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned pad:4; + unsigned system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned pad0:1; + unsigned grf_reg_count:3; + unsigned pad1:2; + unsigned kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned ext_halt_exception_enable:1; + unsigned sw_exception_enable:1; + unsigned mask_stack_exception_enable:1; + unsigned timeout_exception_enable:1; + unsigned illegal_op_exception_enable:1; + unsigned pad0:3; + unsigned depth_coef_urb_read_offset:6; /* WM only */ + unsigned pad1:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad3:5; + unsigned single_program_flow:1; +}; + +struct thread2 +{ + unsigned per_thread_scratch_space:4; + unsigned pad0:6; + unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned dispatch_grf_start_reg:4; + unsigned urb_entry_read_offset:6; + unsigned pad0:1; + unsigned urb_entry_read_length:6; + unsigned pad1:1; + unsigned const_urb_entry_read_offset:6; + unsigned pad2:1; + unsigned const_urb_entry_read_length:6; + unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + unsigned pad0:7; + unsigned sw_exception_enable:1; + unsigned pad1:3; + unsigned mask_stack_exception_enable:1; + unsigned pad2:1; + unsigned illegal_op_exception_enable:1; + unsigned pad3:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad4:5; + unsigned single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:9; + unsigned gs_output_stats:1; /* not always */ + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; /* may be less */ + unsigned pad3:6; + } thread4; + + struct + { + unsigned pad0:13; + unsigned clip_mode:3; + unsigned userclip_enable_flags:8; + unsigned userclip_must_clip:1; + unsigned pad1:1; + unsigned guard_band_enable:1; + unsigned viewport_z_clip_enable:1; + unsigned viewport_xy_clip_enable:1; + unsigned vertex_position_space:1; + unsigned api_mode:1; + unsigned pad2:1; + } clip5; + + struct + { + unsigned pad0:5; + unsigned clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } cc0; + + + struct + { + unsigned bf_stencil_ref:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + unsigned stencil_ref:8; + } cc1; + + + struct + { + unsigned logicop_enable:1; + unsigned pad0:10; + unsigned depth_write_enable:1; + unsigned depth_test_function:3; + unsigned depth_test:1; + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned pad0:8; + unsigned alpha_test_func:3; + unsigned alpha_test:1; + unsigned blend_enable:1; + unsigned ia_blend_enable:1; + unsigned pad1:1; + unsigned alpha_test_format:1; + unsigned pad2:16; + } cc3; + + struct + { + unsigned pad0:5; + unsigned cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned pad0:2; + unsigned ia_dest_blend_factor:5; + unsigned ia_src_blend_factor:5; + unsigned ia_blend_function:3; + unsigned statistics_enable:1; + unsigned logicop_func:4; + unsigned pad1:11; + unsigned dither_enable:1; + } cc5; + + struct + { + unsigned clamp_post_alpha_blend:1; + unsigned clamp_pre_alpha_blend:1; + unsigned clamp_range:2; + unsigned pad0:11; + unsigned y_dither_offset:2; + unsigned x_dither_offset:2; + unsigned dest_blend_factor:5; + unsigned src_blend_factor:5; + unsigned blend_function:3; + } cc6; + + struct { + union { + float f; + ubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned front_winding:1; + unsigned viewport_transform:1; + unsigned pad0:3; + unsigned sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned pad0:9; + unsigned dest_org_vbias:4; + unsigned dest_org_hbias:4; + unsigned scissor:1; + unsigned disable_2x2_trifilter:1; + unsigned disable_zero_pix_trifilter:1; + unsigned point_rast_rule:2; + unsigned line_endcap_aa_region_width:2; + unsigned line_width:4; + unsigned fast_scissor_disable:1; + unsigned cull_mode:2; + unsigned aa_enable:1; + } sf6; + + struct + { + unsigned point_size:11; + unsigned use_point_size_state:1; + unsigned subpixel_precision:1; + unsigned sprite_point:1; + unsigned pad0:11; + unsigned trifan_pv:2; + unsigned linestrip_pv:2; + unsigned tristrip_pv:2; + unsigned line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; + unsigned pad3:6; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned max_vp_index:4; + unsigned pad0:26; + unsigned reorder_enable:1; + unsigned pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:4; + unsigned pad3:3; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } vs5; + + struct + { + unsigned vs_enable:1; + unsigned vert_cache_disable:1; + unsigned pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned stats_enable:1; + unsigned pad0:1; + unsigned sampler_count:3; + unsigned sampler_state_pointer:27; + } wm4; + + struct + { + unsigned enable_8_pix:1; + unsigned enable_16_pix:1; + unsigned enable_32_pix:1; + unsigned pad0:7; + unsigned legacy_global_depth_bias:1; + unsigned line_stipple:1; + unsigned depth_offset:1; + unsigned polygon_stipple:1; + unsigned line_aa_region_width:2; + unsigned line_endcap_aa_region_width:2; + unsigned early_depth_test:1; + unsigned thread_dispatch_enable:1; + unsigned program_uses_depth:1; + unsigned program_computes_depth:1; + unsigned program_uses_killpixel:1; + unsigned legacy_line_rast: 1; + unsigned pad1:1; + unsigned max_threads:6; + unsigned pad2:1; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +struct brw_sampler_default_color { + float color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned shadow_function:3; + unsigned lod_bias:11; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned pad:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned pad:3; + unsigned max_lod:10; + unsigned min_lod:10; + } ss1; + + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned pad:19; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned monochrome_filter_width:3; + unsigned monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + unsigned cube_pos_z:1; + unsigned cube_neg_z:1; + unsigned cube_pos_y:1; + unsigned cube_neg_y:1; + unsigned cube_pos_x:1; + unsigned cube_neg_x:1; + unsigned pad:4; + unsigned mipmap_layout_mode:1; + unsigned vert_line_stride_ofs:1; + unsigned vert_line_stride:1; + unsigned color_blend:1; + unsigned writedisable_blue:1; + unsigned writedisable_green:1; + unsigned writedisable_red:1; + unsigned writedisable_alpha:1; + unsigned surface_format:9; + unsigned data_return_format:1; + unsigned pad0:1; + unsigned surface_type:3; + } ss0; + + struct { + unsigned base_addr; + } ss1; + + struct { + unsigned pad:2; + unsigned mip_count:4; + unsigned width:13; + unsigned height:13; + } ss2; + + struct { + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad:1; + unsigned pitch:18; + unsigned depth:11; + } ss3; + + struct { + unsigned pad:19; + unsigned min_array_elt:9; + unsigned min_lod:4; + } ss4; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } vb0; + + unsigned start_addr; + unsigned max_index; +#if 1 + unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + unsigned src_offset:11; + unsigned pad:5; + unsigned src_format:9; + unsigned pad0:1; + unsigned valid:1; + unsigned vertex_buffer_index:5; + } ve0; + + struct + { + unsigned dst_offset:8; + unsigned pad:8; + unsigned vfcomponent3:4; + unsigned vfcomponent2:4; + unsigned vfcomponent1:4; + unsigned vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned pad0:2; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad0:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned pad1:2; + unsigned dest_address_mode:1; + } da16; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned pad1:2; + unsigned dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } ia1; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } da16; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned pixel_scoreboard_clear:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + struct { + unsigned pad:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + int d; + unsigned ud; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c new file mode 100644 index 0000000000..b89756c47b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -0,0 +1,124 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +brw_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); + + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->stride : src->stride, + srcx, do_flip ? height - 1 - srcy : srcy); + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + brw_copy_blit(brw_context(pipe), + do_flip, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, + (short) width, (short) height, PIPE_LOGICOP_COPY); + } +} + + +static void +brw_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + brw_fill_blit(brw_context(pipe), + dst->block.size, + (short) dst->stride/dst->block.size, + dst->buffer, dst->offset, FALSE, + (short) dstx, (short) dsty, + (short) width, (short) height, + value); + } +} + + +void +brw_init_surface_functions(struct brw_context *brw) +{ + brw->pipe.surface_copy = brw_surface_copy; + brw->pipe.surface_fill = brw_surface_fill; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c new file mode 100644 index 0000000000..cc0c665e02 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -0,0 +1,400 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "brw_context.h" +#include "brw_tex_layout.h" + + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#if 0 +unsigned intel_compressed_alignment(unsigned internalFormat) +{ + unsigned alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; +} +#endif + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void intel_miptree_set_image_offset(struct brw_texture *tex, + unsigned level, + unsigned img, + unsigned x, unsigned y) +{ + struct pipe_texture *pt = &tex->base; + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; +} + +static void intel_miptree_set_level_info(struct brw_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + + tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void i945_miptree_layout_2d(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + const int align_x = 2, align_y = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_nblocksx + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); + + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; + } + + /* Pitch must be a whole number of dwords + */ + tex->stride = align(tex->stride, 64); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + intel_miptree_set_level_info(tex, level, 1, x, y, width, + height, 1); + + nblocksy = align(nblocksy, align_y); + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align(nblocksx, align_x); + } + else { + y += nblocksy; + } + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static boolean brw_miptree_layout(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + /* XXX: these vary depending on image format: + */ +/* int align_w = 4; */ + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_3D: { + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + unsigned align_h = 2; + unsigned align_w = 4; + + tex->total_nblocksy = 0; + + tex->stride = align(pt->nblocksx[0], 4); + pack_y_pitch = align(pt->nblocksy[0], align_h); + + pack_x_pitch = tex->stride / pt->block.size; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + uint q, j; + + intel_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_nblocksy, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_nblocksy += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + + if (pt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + + } + break; + } + + default: + i945_miptree_layout_2d(tex); + break; + } +#if 0 + PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + pt->pitch, + pt->total_nblocksy, + pt->block.size, + pt->stride * pt->total_nblocksy ); +#endif + + return TRUE; +} + + +static struct pipe_texture * +brw_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = CALLOC_STRUCT(brw_texture); + + if (tex) { + tex->base = *templat; + tex->base.refcount = 1; + + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + + if (brw_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->stride * + tex->total_nblocksy); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + + +static void +brw_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = (struct brw_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + winsys_buffer_reference(ws, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + free(tex->image_offset[i]); + + free(tex); + } + *pt = NULL; +} + + +static struct pipe_surface * +brw_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face]; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice]; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->format); + assert(ps->refcount); + winsys_buffer_reference(ws, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; + ps->offset = offset; + } + return ps; +} + + +void +brw_init_texture_functions(struct brw_context *brw) +{ +// brw->pipe.texture_update = brw_texture_update; +} + + +void +brw_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = brw_texture_create_screen; + screen->texture_release = brw_texture_release_screen; + screen->get_tex_surface = brw_get_tex_surface_screen; +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h new file mode 100644 index 0000000000..a6b6ba8146 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -0,0 +1,44 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + +#ifndef BRW_TEX_LAYOUT_H +#define BRW_TEX_LAYOUT_H + + +struct brw_context; +struct pipe_screen; + + +extern void +brw_init_texture_functions(struct brw_context *brw); + +extern void +brw_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c new file mode 100644 index 0000000000..101a4367b9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_urb.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +//#include "brw_state.h" +#include "brw_batch.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + unsigned min_nr_entries; + unsigned preferred_nr_entries; + unsigned min_entry_size; + unsigned max_entry_size; +} limits[CS+1] = { + { 8, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 6, 8, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static boolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + unsigned csize = brw->curbe.total_size; + unsigned vsize = brw->vs.prog_data->urb_entry_size; + unsigned sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || + brw->urb.sfsize > brw->urb.sfsize || + brw->urb.csize > brw->urb.csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + debug_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); + } + else + brw->urb.constrained = 0; + + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + 256); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = 256; + + BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c new file mode 100644 index 0000000000..42391d7c8c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_util.h" +#include "brw_defines.h" + +#include "pipe/p_defines.h" + +unsigned brw_count_bits( unsigned val ) +{ + unsigned i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +unsigned brw_translate_blend_equation( int mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +unsigned brw_translate_blend_factor( int factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h new file mode 100644 index 0000000000..d60e5934db --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.h @@ -0,0 +1,43 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "pipe/p_state.h" + +extern unsigned brw_count_bits( unsigned val ); +extern unsigned brw_translate_blend_factor( int factor ); +extern unsigned brw_translate_blend_equation( int mode ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c new file mode 100644 index 0000000000..92327e896d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -0,0 +1,120 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" + + +static void do_vs_prog( struct brw_context *brw, + const struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + unsigned program_size; + const unsigned *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(&c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->info.num_outputs; + c.prog_data.inputs_read = vp->info.num_inputs; + +#if 0 + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; + } +#endif + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* + */ + brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ + struct brw_vs_prog_key key; + const struct brw_vertex_program *vp = brw->attribs.VertexProgram; + + assert(vp); + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw->attribs.Clip.nr; + key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_VS_PROG], + &key, sizeof(key), + &brw->vs.prog_data, + &brw->vs.prog_gs_offset)) + return; + + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .brw = BRW_NEW_VS, + .cache = 0 + }, + .update = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h new file mode 100644 index 0000000000..070f9dfcae --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -0,0 +1,82 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { + unsigned program_string_id; + unsigned nr_userclip:4; + unsigned copy_edgeflag:1; + unsigned know_w_is_one:1; + unsigned pad:26; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + const struct brw_vertex_program *vp; + + unsigned nr_inputs; + + unsigned first_output; + unsigned nr_outputs; + + unsigned first_tmp; + unsigned last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[12][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + boolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c new file mode 100644 index 0000000000..34dbc0624d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -0,0 +1,1330 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_vs.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +struct brw_prog_info { + unsigned num_temps; + unsigned num_addrs; + unsigned num_consts; + + unsigned writes_psize; + + unsigned pos_idx; + unsigned result_edge_idx; + unsigned edge_flag_idx; + unsigned psize_idx; +}; + +/* Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c, + struct brw_prog_info *info ) +{ + unsigned i, reg = 0, mrf; + unsigned nr_params; + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6+c->key.nr_userclip+3)/4)*2; + } + + /* Vertex program parameters from curbe: + */ + nr_params = c->prog_data.max_const; + for (i = 0; i < nr_params; i++) { + c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params+1)/2; + c->prog_data.curb_read_length = reg - 1; + + + + /* Allocate input regs: + */ + c->nr_inputs = c->vp->info.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + + + /* Allocate outputs: TODO: could organize the non-position outputs + * to go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + mrf = 4; + for (i = 0; i < c->vp->info.num_outputs; i++) { + c->nr_outputs++; +#if 0 + if (i == VERT_RESULT_HPOS) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#else + /*treat pos differently for now */ + if (i == info->pos_idx) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#endif + } + + /* Allocate program temporaries: + */ + for (i = 0; i < info->num_temps; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < info->num_addrs; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); + reg++; + } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + + c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0); + } +} + +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned cond) +{ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_pop_insn_state(p); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + unsigned precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + +static void emit_math2( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_PARTIAL); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + unsigned file, + unsigned index ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case TGSI_FILE_CONSTANT: + assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; + case TGSI_FILE_IMMEDIATE: + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: /* undef values */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + int offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); + + if (need_tmp) + release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + struct tgsi_src_register *src ) +{ + struct brw_reg reg; + + if (src->File == TGSI_FILE_NULL) + return brw_null_reg(); + +#if 0 + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); + else +#endif + reg = get_reg(c, src->File, src->Index); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, + src->SwizzleY, + src->SwizzleZ, + src->SwizzleW); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src->Negate ? 1 : 0; + + return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, + const struct tgsi_dst_register *dst ) +{ + struct brw_reg reg = get_reg(c, dst->File, dst->Index); + + reg.dw1.bits.writemask = dst->WriteMask; + + return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + struct tgsi_src_register src ) +{ + struct brw_compile *p = &c->func; + unsigned zeros_mask = 0; + unsigned ones_mask = 0; + unsigned src_mask = 0; + ubyte src_swz[4]; + boolean need_tmp = (src.Negate && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + unsigned i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<<i)) { + ubyte s = 0; + switch(i) { + case 0: + s = src.SwizzleX; + break; + s = src.SwizzleY; + case 1: + break; + s = src.SwizzleZ; + case 2: + break; + s = src.SwizzleW; + case 3: + break; + } + switch (s) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + src_mask |= 1<<i; + src_swz[i] = s; + break; + case TGSI_EXTSWIZZLE_ZERO: + zeros_mask |= 1<<i; + break; + case TGSI_EXTSWIZZLE_ONE: + ones_mask |= 1<<i; + break; + } + } + } + + /* Do src first, in case dst aliases src: + */ + if (src_mask) { + struct brw_reg arg0; + +#if 0 + if (src.RelAddr) + arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + else +#endif + arg0 = get_reg(c, src.File, src.Index); + + arg0 = brw_swizzle(arg0, + src_swz[0], src_swz[1], + src_swz[2], src_swz[3]); + + brw_MOV(p, brw_writemask(tmp, src_mask), arg0); + } + + if (zeros_mask) + brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + + if (ones_mask) + brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +/* Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; + struct brw_reg ndc; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), + get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); + } + + + /* Build ndc coords? TODO: Shortcircuit when w is known to be one. + */ + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } + + /* This includes the workaround for -ve rhw, so is no longer an + * optional step: + */ + if (info->writes_psize || + c->key.nr_userclip || + !c->key.know_w_is_one) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + unsigned i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (info->writes_psize) { + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; + brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, + brw_imm_ud(0x7ff<<8)); + } + + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (!c->key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + c->nr_outputs + 3, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + +} + +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + struct tgsi_parse_context parse; + const struct tgsi_token *tokens = c->vp->program.tokens; + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { +#if 0 + struct brw_instruction *brw_inst1, *brw_inst2; + const struct tgsi_full_instruction *inst1, *inst2; + int offset; + inst1 = &parse.FullToken.FullInstruction; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case TGSI_OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } +#endif + } + } + tgsi_parse_free(&parse); +} + +static void process_declaration(const struct tgsi_full_declaration *decl, + struct brw_prog_info *info) +{ + int first = decl->DeclarationRange.First; + int last = decl->DeclarationRange.Last; + + switch(decl->Declaration.File) { + case TGSI_FILE_CONSTANT: + info->num_consts += last - first + 1; + break; + case TGSI_FILE_INPUT: { + } + break; + case TGSI_FILE_OUTPUT: { + assert(last == first); /* for now */ + if (decl->Declaration.Semantic) { + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: { + info->pos_idx = first; + } + break; + case TGSI_SEMANTIC_COLOR: + break; + case TGSI_SEMANTIC_BCOLOR: + break; + case TGSI_SEMANTIC_FOG: + break; + case TGSI_SEMANTIC_PSIZE: { + info->writes_psize = TRUE; + info->psize_idx = first; + } + break; + case TGSI_SEMANTIC_GENERIC: + break; + } + } + } + break; + case TGSI_FILE_TEMPORARY: { + info->num_temps += (last - first) + 1; + } + break; + case TGSI_FILE_SAMPLER: { + } + break; + case TGSI_FILE_ADDRESS: { + info->num_addrs += (last - first) + 1; + } + break; + case TGSI_FILE_IMMEDIATE: { + } + break; + case TGSI_FILE_NULL: { + } + break; + } +} + +static void process_instruction(struct brw_vs_compile *c, + struct tgsi_full_instruction *inst, + struct brw_prog_info *info) +{ + struct brw_reg args[3], dst; + struct brw_compile *p = &c->func; + /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ + unsigned i; + unsigned index; + unsigned file; + /*FIXME: might not be the only one*/ + const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; + /* + struct brw_instruction *if_inst[MAX_IFSN]; + unsigned insn, if_insn = 0; + */ + + for (i = 0; i < 3; i++) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + index = src->SrcRegister.Index; + file = src->SrcRegister.File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, &src->SrcRegister); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = dst_reg->Index; + file = dst_reg->File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, dst_reg); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: +#if 0 + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst->SrcReg[0] ); +#endif + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; +#if 0 + case TGSI_OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; +#endif + case TGSI_OPCODE_RET: +#if 0 + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); +#else + /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ +#endif + break; + case TGSI_OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + default: + debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); + break; + } + + if (dst_reg->File == TGSI_FILE_OUTPUT + && dst_reg->Index != info->pos_idx + && c->output_regs[dst_reg->Index].used_in_src) + brw_MOV(p, get_dst(c, dst_reg), dst); + + release_tmps(c); +} + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ +#define MAX_IFSN 32 + struct brw_compile *p = &c->func; + struct brw_instruction *end_inst; + struct tgsi_parse_context parse; + struct brw_indirect stack_index = brw_indirect(0, 0); + const struct tgsi_token *tokens = c->vp->program.tokens; + struct brw_prog_info prog_info; + unsigned allocated_registers = 0; + memset(&prog_info, 0, sizeof(struct brw_prog_info)); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + tgsi_parse_init(&parse, tokens); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + unsigned i; + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: { + const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + for (i = 0; i < 3; ++i) { + const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; + unsigned index = src->Index; + unsigned file = src->File; + if (file == TGSI_FILE_OUTPUT) + c->output_regs[index].used_in_src = TRUE; + } + } + break; + default: + /* nothing */ + break; + } + } + tgsi_parse_free(&parse); + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + process_declaration(decl, &prog_info); + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: { + struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; + /*assert(imm->Immediate.Size == 4);*/ + c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; + c->prog_data.num_imm++; + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: { + struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + if (!allocated_registers) { + /* first instruction (declerations finished). + * now that we know what vars are being used allocate + * registers for them.*/ + c->prog_data.num_consts = prog_info.num_consts; + c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; + brw_vs_alloc_regs(c, &prog_info); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_set_access_mode(p, BRW_ALIGN_16); + allocated_registers = 1; + } + process_instruction(c, inst, &prog_info); + } + break; + } + } + + end_inst = &p->store[p->nr_insn]; + emit_vertex_write(c, &prog_info); + post_vs_emit(c, end_inst); + tgsi_parse_free(&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c new file mode 100644 index 0000000000..1eaff87892 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -0,0 +1,103 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_state vs; + + memset(&vs, 0, sizeof(vs)); + + /* CACHE_NEW_VS_PROG */ + vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + + + /* BRW_NEW_URB_FENCE */ + vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; + vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + vs.thread4.max_threads = MIN2( + MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), + 15); + + + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_offset = 0; + + /* No samplers for ARB_vp programs: + */ + vs.vs5.sampler_count = 0; + + if (BRW_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_vs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h new file mode 100644 index 0000000000..ec1e400418 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i965simple requires any window system + * hosting it to implement. This is the only include file in i965simple + * which is public. + * + */ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/* The pipe driver currently understands the following chipsets: + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + + +/* These are the names of all the state caches managed by the driver. + * + * When data is uploaded to a buffer with buffer_subdata, we use the + * special version of that function below so that information about + * what type of data this is can be passed to the winsys backend. + * That in turn allows the correct flags to be set in the aub file + * dump to allow human-readable file dumps later on. + */ + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + +/** + * Additional winsys interface for i965simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct brw_winsys { + + void (*destroy)(struct brw_winsys *); + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)(struct brw_winsys *sws, + unsigned dwords, + unsigned relocs); + + void (*batch_dword)(struct brw_winsys *sws, + unsigned dword); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta); + + + /* Not used yet, but really want this: + */ + void (*batch_end)( struct brw_winsys *sws ); + + /** + * Flush the batch buffer. + * + * Fence argument must point to NULL or to a previous fence, and the caller + * must call fence_reference when done with the fence. + */ + void (*batch_flush)(struct brw_winsys *sws, + struct pipe_fence_handle **fence); + + + /* A version of buffer_subdata that includes information for the + * simulator: + */ + void (*buffer_subdata_typed)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type); + + + /* A cheat so we don't have to think about relocations in a couple + * of places yet: + */ + unsigned (*get_buffer_offset)( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned flags ); + +}; + +#define BRW_BUFFER_ACCESS_WRITE 0x1 +#define BRW_BUFFER_ACCESS_READ 0x2 + +#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *brw_create(struct pipe_screen *, + struct brw_winsys *, + unsigned pci_id); + +static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, + const void *data, + unsigned bytes) +{ + static const unsigned incr = sizeof(unsigned); + uint i; + const unsigned *udata = (const unsigned*)(data); + unsigned size = bytes/incr; + + winsys->batch_start(winsys, size, 0); + for (i = 0; i < size; ++i) { + winsys->batch_dword(winsys, udata[i]); + } + winsys->batch_end(winsys); + + return (i == size); +} +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c new file mode 100644 index 0000000000..8de565b96c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_eu.h" +#include "brw_state.h" +#include "util/u_memory.h" + + + +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); + const unsigned *program; + unsigned program_size; + + c->key = *key; + c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + + + debug_printf("XXXXXXXX FP\n"); + + brw_wm_glsl_emit(c); + + /* get the program + */ + program = brw_get_program(&c->func, &program_size); + + /* + */ + brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], + &c->key, + sizeof(c->key), + program, + program_size, + &c->prog_data, + &brw->wm.prog_data ); + + FREE(c); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->attribs.FragmentProgram; + unsigned lookup = 0; + unsigned line_aa; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* BRW_NEW_DEPTH_STENCIL */ + if (fp->info.uses_kill || + brw->attribs.DepthStencil->alpha.enabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->info.writes_z) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + if (brw->attribs.DepthStencil->depth.enabled) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->depth.enabled && + brw->attribs.DepthStencil->depth.writemask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + /* XXX: when should this be disabled? + */ + if (1) + lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (brw->attribs.Raster->line_smooth) { + if (brw->reduced_primitive == PIPE_PRIM_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || + (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) + line_aa = AA_ALWAYS; + } + else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + key); + + +#if 0 + /* BRW_NEW_SAMPLER + * + * Not doing any of this at the moment: + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; + + if (unit) { + + if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + key->shadowtex_mask |= 1<<i; + } + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + key->yuvtex_mask |= 1<<i; + } + } +#endif + + + /* Extra info: + */ + key->program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->attribs.FragmentProgram; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_WM_PROG], + &key, sizeof(key), + &brw->wm.prog_data, + &brw->wm.prog_gs_offset)) + return; + + do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .brw = (BRW_NEW_FS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .update = brw_upload_wm_prog +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h new file mode 100644 index 0000000000..b29c4393f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -0,0 +1,142 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_EARLY_DEPTH_TEST_BIT 0x40 +#define IZ_BIT_MAX 0x80 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + unsigned source_depth_reg:3; + unsigned aa_dest_stencil_reg:3; + unsigned dest_depth_reg:3; + unsigned nr_depth_regs:3; + unsigned shadowtex_mask:8; + unsigned computes_depth:1; /* could be derived from program string */ + unsigned source_depth_to_render_target:1; + unsigned runtime_check_aads_emit:1; + + unsigned yuvtex_mask:8; + + unsigned program_string_id; +}; + + + + + +#define PROGRAM_INTERNAL_PARAM +#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + +#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) + +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; /* result */ + + struct brw_fragment_program *fp; + + unsigned grf_limit; + unsigned max_wm_grf; + + + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; + + + struct brw_reg wm_regs[8][32][4]; + + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; + + struct brw_reg emit_mask_reg; + + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; + + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; + + struct brw_instruction *inst0; + struct brw_instruction *inst1; + + struct brw_reg stack; + struct brw_indirect stack_index; + + unsigned reg_index; + + unsigned tmp_start; + unsigned tmp_index; +}; + + + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ); + +void brw_wm_glsl_emit(struct brw_wm_compile *c); +void brw_wm_emit_decls(struct brw_wm_compile *c); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..d50e66f613 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -0,0 +1,392 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; + int index = 0; + + /* XXX number of constants, or highest numbered constant? */ + assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Adjust for parameter coefficients for position, which are + * currently always provided. + */ +// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + /* XXX do we want to loop over the _number_ of inputs/outputs or loop + * to the highest input/output index that's used? + * Probably the same, actually. + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); + assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + for( i = first; i <= last; i++ ) { + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c new file mode 100644 index 0000000000..ab6410aa60 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -0,0 +1,1076 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + + +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component ) +{ + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } +} + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + int component) +{ + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); +} + +static int get_swz( struct tgsi_src_register src, int index ) +{ + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } +} + +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) +{ + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) +{ + struct brw_reg reg; + int component = index; + int neg = 0; + int abs = 0; + + if (src->SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; +} + +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); +} + + +static void emit_xpd(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + + +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + + +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); +} + + +static void emit_max(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned cond) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + + +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +/* TODO + BIAS on SIMD8 not workind yet... +*/ +static void emit_txb(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif + + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif +} + +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif + + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif +} + + + + + + + + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + +} + + +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) +{ + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: +#if 0 + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +#else + emit_fb_write(c, inst); +#endif + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + debug_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif +} + + + + + + +void brw_wm_glsl_emit(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } +#endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c new file mode 100644 index 0000000000..6c5f25bf39 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_iz.c @@ -0,0 +1,214 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + unsigned mode:2; + unsigned sd_present:1; + unsigned sd_to_rt:1; + unsigned dd_present:1; + unsigned ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ) +{ + unsigned reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c new file mode 100644 index 0000000000..52b2909a65 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -0,0 +1,275 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + +static int intel_translate_shadow_compare_func(unsigned func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned translate_wrap_mode( int wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return BRW_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static unsigned U_FIXED(float value, unsigned frac_bits) +{ + value *= (1<<frac_bits); + return value < 0 ? 0 : value; +} + +static int S_FIXED(float value, unsigned frac_bits) +{ + return value * (1<<frac_bits); +} + + +static unsigned upload_default_color( struct brw_context *brw, + const float *color ) +{ + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, + unsigned sdc_gs_offset, + struct brw_sampler_state *sampler) +{ + memset(sampler, 0, sizeof(*sampler)); + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + break; + default: + break; + } + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + default: + break; + } + /* Set Anisotropy: + */ + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + + sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); + sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); + sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); + + /* Fulsim complains if I don't do this. Hardware doesn't mind: + */ +#if 0 + if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } +#endif + + /* Set shadow function: + */ + if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); + + sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers(struct brw_context *brw) +{ + unsigned unit; + unsigned sampler_count = 0; + + /* BRW_NEW_SAMPLER */ + for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + if (brw->attribs.Texture[unit]) { + const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; + unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); + + brw_update_sampler_state(sampler, + sdc_gs_offset, + &brw->wm.sampler[unit]); + + sampler_count = unit + 1; + } + } + + if (brw->wm.sampler_count != sampler_count) { + brw->wm.sampler_count = sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + brw->wm.sampler_gs_offset = 0; + + if (brw->wm.sampler_count) + brw->wm.sampler_gs_offset = + brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + brw->wm.sampler, + sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .brw = BRW_NEW_SAMPLER, + .cache = 0 + }, + .update = upload_wm_samplers +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c new file mode 100644 index 0000000000..37a9bf919c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -0,0 +1,195 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ +static void upload_wm_unit(struct brw_context *brw ) +{ + struct brw_wm_unit_state wm; + unsigned max_threads; + unsigned per_thread; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + max_threads = 0; + else + max_threads = 31; + + + memset(&wm, 0, sizeof(wm)); + + /* CACHE_NEW_WM_PROG */ + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + + wm.wm5.max_threads = max_threads; + + per_thread = align(brw->wm.prog_data->total_scratch, 1024); + assert(per_thread <= 12 * 1024); + +#if 0 + if (brw->wm.prog_data->total_scratch) { + unsigned total = per_thread * (max_threads + 1); + + /* Scratch space -- just have to make sure there is sufficient + * allocated for the active program and current number of threads. + */ + brw->wm.scratch_buffer_size = total; + if (brw->wm.scratch_buffer && + brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (!brw->wm.scratch_buffer) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, + "wm scratch", + brw->wm.scratch_buffer_size, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + /* XXX: Scratch buffers are not implemented correectly. + * + * The scratch offset to be programmed into wm is relative to the general + * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or + * the previous bmGenBuffers scheme), we get an offset relative to the + * start of framebuffer. Even before then, it was broken in other ways, + * so just fail for now if we hit that path. + */ + assert(brw->wm.prog_data->total_scratch == 0); +#endif + + /* CACHE_NEW_SURFACE */ + wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS */ + wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + + wm.thread3.urb_entry_read_offset = 0; + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + /* CACHE_NEW_SAMPLER */ + wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; + wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + { + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; + + if (fp->UsesDepth) + wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + + if (fp->info.writes_z) + wm.wm5.program_computes_depth = 1; + + /* BRW_NEW_ALPHA_TEST */ + if (fp->info.uses_kill || + brw->attribs.DepthStencil->alpha.enabled) + wm.wm5.program_uses_killpixel = 1; + + wm.wm5.enable_8_pix = 1; + } + + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) + wm.wm5.polygon_stipple = 1; + +#if 0 + if (brw->attribs.Polygon->OffsetFill) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + } +#endif + + if (brw->attribs.Raster->line_stipple_enable) { + wm.wm5.line_stipple = 1; + } + + if (BRW_DEBUG & DEBUG_STATS) + wm.wm4.stats_enable = 1; + + brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + + if (brw->wm.prog_data->total_scratch) { + /* + dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + (per_thread / 1024) - 1, + brw->wm.state_gs_offset + + ((char *)&wm.thread2 - (char *)&wm), + brw->wm.scratch_buffer); + */ + } else { + wm.thread2.scratch_space_base_pointer = 0; + } +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), + + .cache = (CACHE_NEW_SURFACE | + CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .update = upload_wm_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c new file mode 100644 index 0000000000..1a326f9918 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -0,0 +1,304 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static unsigned translate_tex_target( enum pipe_texture_target target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + +static unsigned translate_tex_format( enum pipe_format pipe_format ) +{ + switch( pipe_format ) { + case PIPE_FORMAT_L8_UNORM: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_I8_UNORM: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_A8_UNORM: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(0); /* not supported for sampling */ + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; +#endif + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; +#if 0 + case PIPE_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case PIPE_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif + + default: + assert(0); + return 0; + } +} + +static unsigned brw_buffer_offset(struct brw_context *brw, + struct pipe_buffer *buffer) +{ + return brw->winsys->get_buffer_offset(brw->winsys, + buffer, + 0); +} + +static +void brw_update_texture_surface( struct brw_context *brw, + unsigned unit ) +{ + const struct brw_texture *tObj = brw->attribs.Texture[unit]; + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(tObj->base.target); + surf.ss0.surface_format = translate_tex_format(tObj->base.format); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + /* Updated in emit_reloc */ + surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); + + surf.ss2.mip_count = tObj->base.last_level; + surf.ss2.width = tObj->base.width[0] - 1; + surf.ss2.height = tObj->base.height[0] - 1; + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; /* always zero */ + surf.ss3.pitch = tObj->stride - 1; + surf.ss3.depth = tObj->base.depth[0] - 1; + + surf.ss4.min_lod = 0; + + if (tObj->base.target == PIPE_TEXTURE_CUBE) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + + brw->wm.bind.surf_ss_offset[unit + 1] = + brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); +} + + + +#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ + unsigned i; + + { + struct brw_surface_state surf; + + /* BRW_NEW_FRAMEBUFFER + */ + struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + + memset(&surf, 0, sizeof(surf)); + + if (pipe_surface != NULL) { + if (pipe_surface->block.size == 4) + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + + surf.ss0.surface_type = BRW_SURFACE_2D; + + surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + + surf.ss2.width = pipe_surface->width - 1; + surf.ss2.height = pipe_surface->height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; + surf.ss3.pitch = pipe_surface->stride - 1; + } else { + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf.ss0.surface_type = BRW_SURFACE_NULL; + } + + /* BRW_NEW_BLEND */ + surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && + brw->attribs.Blend->blend_enable); + + + surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); + surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); + surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); + surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); + + + + + brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + + brw->wm.nr_surfaces = 1; + } + + + /* BRW_NEW_TEXTURE + */ + for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { + const struct brw_texture *texUnit = brw->attribs.Texture[i]; + + if (texUnit && + texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { + + brw_update_texture_surface(brw, i); + + brw->wm.nr_surfaces = i+2; + } + else { + brw->wm.bind.surf_ss_offset[i+1] = 0; + } + } + + brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], + &brw->wm.bind ); +} + + +/* KW: Will find a different way to acheive this, see for example the + * state caches with relocs in the i915 swz driver. + */ +#if 0 +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ + int unit; + + if (brw->state.draw_region != NULL) { + /* Emit framebuffer relocation */ + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, + brw->wm.bind.surf_ss_offset[0] + + offsetof(struct brw_surface_state, ss1), + brw->state.draw_region->buffer); + } + + /* Emit relocations for texture buffers */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + brw->wm.bind.surf_ss_offset[unit + 1] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer); + } + } +} +#endif + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .brw = (BRW_NEW_FRAMEBUFFER | + BRW_NEW_BLEND | + BRW_NEW_TEXTURE), + .cache = 0 + }, + .update = upload_wm_surfaces, +}; diff --git a/src/gallium/drivers/nouveau/nouveau_bo.h b/src/gallium/drivers/nouveau/nouveau_bo.h new file mode 100644 index 0000000000..65b138283c --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_bo.h @@ -0,0 +1,53 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_BO_H__ +#define __NOUVEAU_BO_H__ + +/* Relocation/Buffer type flags */ +#define NOUVEAU_BO_VRAM (1 << 0) +#define NOUVEAU_BO_GART (1 << 1) +#define NOUVEAU_BO_RD (1 << 2) +#define NOUVEAU_BO_WR (1 << 3) +#define NOUVEAU_BO_RDWR (NOUVEAU_BO_RD | NOUVEAU_BO_WR) +#define NOUVEAU_BO_MAP (1 << 4) +#define NOUVEAU_BO_PIN (1 << 5) +#define NOUVEAU_BO_LOW (1 << 6) +#define NOUVEAU_BO_HIGH (1 << 7) +#define NOUVEAU_BO_OR (1 << 8) +#define NOUVEAU_BO_LOCAL (1 << 9) +#define NOUVEAU_BO_TILED (1 << 10) +#define NOUVEAU_BO_ZTILE (1 << 11) +#define NOUVEAU_BO_DUMMY (1 << 31) + +struct nouveau_bo { + struct nouveau_device *device; + uint64_t handle; + + uint64_t size; + void *map; + + uint32_t flags; + uint64_t offset; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_channel.h b/src/gallium/drivers/nouveau/nouveau_channel.h new file mode 100644 index 0000000000..cd99a676bd --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_channel.h @@ -0,0 +1,40 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_CHANNEL_H__ +#define __NOUVEAU_CHANNEL_H__ + +struct nouveau_channel { + struct nouveau_device *device; + int id; + + struct nouveau_pushbuf *pushbuf; + + struct nouveau_grobj *nullobj; + struct nouveau_grobj *vram; + struct nouveau_grobj *gart; + + void *user_private; + void (*hang_notify)(struct nouveau_channel *); +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_class.h b/src/gallium/drivers/nouveau/nouveau_class.h new file mode 100644 index 0000000000..3df3d7b2b8 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_class.h @@ -0,0 +1,8006 @@ +/************************************************************************* + + Autogenerated file, do not edit ! + +************************************************************************** + + Copyright (C) 2006-2008 : + Dmitry Baryshkov, + Laurent Carlier, + Matthieu Castet, + Dawid Gajownik, + Jeremy Kolb, + Stephane Loeuillet, + Patrice Mandin, + Stephane Marchesin, + Serge Martin, + Sylvain Munaut, + Simon Raffeiner, + Ben Skeggs, + Erik Waling, + koala_br, + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*************************************************************************/ + + +#ifndef NOUVEAU_REG_H +#define NOUVEAU_REG_H 1 + + +#define NV01_ROOT 0x00000001 + + + +#define NV01_CONTEXT_DMA 0x00000002 + + + +#define NV01_DEVICE 0x00000003 + + + +#define NV01_TIMER 0x00000004 + +#define NV01_TIMER_SYNCHRONIZE 0x00000100 +#define NV01_TIMER_STOP_ALARM 0x00000104 +#define NV01_TIMER_DMA_NOTIFY 0x00000180 +#define NV01_TIMER_TIME(x) (0x00000300+((x)*4)) +#define NV01_TIMER_TIME__SIZE 0x00000002 +#define NV01_TIMER_ALARM_NOTIFY 0x00000308 + + +#define NV_IMAGE_STENCIL 0x00000010 + +#define NV_IMAGE_STENCIL_NOTIFY 0x00000104 +#define NV_IMAGE_STENCIL_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_STENCIL_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_STENCIL_IMAGE_INPUT(x) (0x00000204+((x)*4)) +#define NV_IMAGE_STENCIL_IMAGE_INPUT__SIZE 0x00000002 + + +#define NV_IMAGE_BLEND_AND 0x00000011 + +#define NV_IMAGE_BLEND_AND_NOP 0x00000100 +#define NV_IMAGE_BLEND_AND_NOTIFY 0x00000104 +#define NV_IMAGE_BLEND_AND_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_BLEND_AND_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_BLEND_AND_BETA_INPUT 0x00000204 +#define NV_IMAGE_BLEND_AND_IMAGE_INPUT 0x00000208 + + +#define NV01_CONTEXT_BETA1 0x00000012 + +#define NV01_CONTEXT_BETA1_NOP 0x00000100 +#define NV01_CONTEXT_BETA1_NOTIFY 0x00000104 +#define NV01_CONTEXT_BETA1_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_BETA1_BETA_1D31 0x00000300 + + +#define NV_IMAGE_ROP_AND 0x00000013 + +#define NV_IMAGE_ROP_AND_NOTIFY 0x00000104 +#define NV_IMAGE_ROP_AND_DMA_NOTIFY 0x00000180 +#define NV_IMAGE_ROP_AND_IMAGE_OUTPUT 0x00000200 +#define NV_IMAGE_ROP_AND_ROP_INPUT 0x00000204 +#define NV_IMAGE_ROP_AND_IMAGE_INPUT(x) (0x00000208+((x)*4)) +#define NV_IMAGE_ROP_AND_IMAGE_INPUT__SIZE 0x00000002 + + +#define NV_IMAGE_COLOR_KEY 0x00000015 + + + +#define NV01_CONTEXT_COLOR_KEY 0x00000017 + +#define NV01_CONTEXT_COLOR_KEY_NOP 0x00000100 +#define NV01_CONTEXT_COLOR_KEY_NOTIFY 0x00000104 +#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT 0x00000300 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A8Y8 0x00000001 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X24Y8 0x00000002 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5 0x00000003 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X17R5G5B5 0x00000004 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8 0x00000005 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X8R8G8B8 0x00000006 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16Y16 0x00000007 +#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16Y16 0x00000008 +#define NV01_CONTEXT_COLOR_KEY_COLOR 0x00000304 + + +#define NV01_CONTEXT_PATTERN 0x00000018 + +#define NV01_CONTEXT_PATTERN_NOP 0x00000100 +#define NV01_CONTEXT_PATTERN_NOTIFY 0x00000104 +#define NV01_CONTEXT_PATTERN_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_PATTERN_COLOR_FORMAT 0x00000300 +#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV01_CONTEXT_PATTERN_SHAPE 0x00000308 +#define NV01_CONTEXT_PATTERN_COLOR(x) (0x00000310+((x)*4)) +#define NV01_CONTEXT_PATTERN_COLOR__SIZE 0x00000002 +#define NV01_CONTEXT_PATTERN_PATTERN(x) (0x00000318+((x)*4)) +#define NV01_CONTEXT_PATTERN_PATTERN__SIZE 0x00000002 + + +#define NV01_CONTEXT_CLIP_RECTANGLE 0x00000019 + +#define NV01_CONTEXT_CLIP_RECTANGLE_NOP 0x00000100 +#define NV01_CONTEXT_CLIP_RECTANGLE_NOTIFY 0x00000104 +#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY 0x00000180 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT 0x00000300 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X_MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_SHIFT 16 +#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y_MASK 0xffff0000 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE 0x00000304 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_SHIFT 0 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W_MASK 0x0000ffff +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_SHIFT 16 +#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_LINE 0x0000001c + +#define NV01_RENDER_SOLID_LINE_NOP 0x00000100 +#define NV01_RENDER_SOLID_LINE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_LINE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_LINE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_LINE_ROP 0x0000018c +#define NV01_RENDER_SOLID_LINE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_LINE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_LINE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A8Y8 0x00000001 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X24Y8 0x00000002 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5 0x00000003 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X17R5G5B5 0x00000004 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8 0x00000005 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X8R8G8B8 0x00000006 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16Y16 0x00000007 +#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16Y16 0x00000008 +#define NV01_RENDER_SOLID_LINE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0(x) (0x00000400+((x)*8)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT0__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1(x) (0x00000404+((x)*8)) +#define NV01_RENDER_SOLID_LINE_LINE_POINT1__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(x) (0x00000480+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(x) (0x00000484+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(x) (0x00000488+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(x) (0x0000048c+((x)*16)) +#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_POLYLINE(x) (0x00000500+((x)*4)) +#define NV01_RENDER_SOLID_LINE_POLYLINE__SIZE 0x00000020 +#define NV01_RENDER_SOLID_LINE_POLYLINE_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_POLYLINE_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_POLYLINE_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(x) (0x00000580+((x)*8)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(x) (0x00000584+((x)*8)) +#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(x) (0x00000600+((x)*8)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(x) (0x00000604+((x)*8)) +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_TRIANGLE 0x0000001d + +#define NV01_RENDER_SOLID_TRIANGLE_NOP 0x00000100 +#define NV01_RENDER_SOLID_TRIANGLE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_TRIANGLE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_TRIANGLE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_TRIANGLE_ROP 0x0000018c +#define NV01_RENDER_SOLID_TRIANGLE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_TRIANGLE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_TRIANGLE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0 0x00000310 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1 0x00000314 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2 0x00000318 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X 0x00000320 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y 0x00000324 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X 0x00000328 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y 0x0000032c +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X 0x00000330 +#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y 0x00000334 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(x) (0x00000400+((x)*4)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__SIZE 0x00000020 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(x) (0x00000480+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(x) (0x00000484+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(x) (0x00000500+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(x) (0x00000504+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(x) (0x00000508+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(x) (0x0000050c+((x)*16)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__SIZE 0x00000008 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(x) (0x00000580+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(x) (0x00000584+((x)*8)) +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y_MASK 0xffff0000 + + +#define NV01_RENDER_SOLID_RECTANGLE 0x0000001e + +#define NV01_RENDER_SOLID_RECTANGLE_NOP 0x00000100 +#define NV01_RENDER_SOLID_RECTANGLE_NOTIFY 0x00000104 +#define NV01_RENDER_SOLID_RECTANGLE_PATCH 0x0000010c +#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY 0x00000180 +#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE 0x00000184 +#define NV01_RENDER_SOLID_RECTANGLE_PATTERN 0x00000188 +#define NV01_RENDER_SOLID_RECTANGLE_ROP 0x0000018c +#define NV01_RENDER_SOLID_RECTANGLE_BETA1 0x00000190 +#define NV01_RENDER_SOLID_RECTANGLE_SURFACE 0x00000194 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION 0x000002fc +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND 0x00000001 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND 0x00000002 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY 0x00000003 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT 0x00000300 +#define NV01_RENDER_SOLID_RECTANGLE_COLOR 0x00000304 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(x) (0x00000400+((x)*8)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__SIZE 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X_MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_SHIFT 16 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y_MASK 0xffff0000 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__SIZE 0x00000010 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_SHIFT 0 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W_MASK 0x0000ffff +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_SHIFT 16 +#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H_MASK 0xffff0000 + + +#define NV01_IMAGE_BLIT 0x0000001f + +#define NV01_IMAGE_BLIT_NOP 0x00000100 +#define NV01_IMAGE_BLIT_NOTIFY 0x00000104 +#define NV01_IMAGE_BLIT_PATCH 0x0000010c +#define NV01_IMAGE_BLIT_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_BLIT_COLOR_KEY 0x00000184 +#define NV01_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 +#define NV01_IMAGE_BLIT_PATTERN 0x0000018c +#define NV01_IMAGE_BLIT_ROP 0x00000190 +#define NV01_IMAGE_BLIT_BETA1 0x00000194 +#define NV01_IMAGE_BLIT_SURFACE 0x0000019c +#define NV01_IMAGE_BLIT_OPERATION 0x000002fc +#define NV01_IMAGE_BLIT_IMAGE_INPUT 0x00000204 +#define NV01_IMAGE_BLIT_POINT_IN 0x00000300 +#define NV01_IMAGE_BLIT_POINT_IN_X_SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_IN_X_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_IN_Y_SHIFT 16 +#define NV01_IMAGE_BLIT_POINT_IN_Y_MASK 0xffff0000 +#define NV01_IMAGE_BLIT_POINT_OUT 0x00000304 +#define NV01_IMAGE_BLIT_POINT_OUT_X_SHIFT 0 +#define NV01_IMAGE_BLIT_POINT_OUT_X_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_POINT_OUT_Y_SHIFT 16 +#define NV01_IMAGE_BLIT_POINT_OUT_Y_MASK 0xffff0000 +#define NV01_IMAGE_BLIT_SIZE 0x00000308 +#define NV01_IMAGE_BLIT_SIZE_W_SHIFT 0 +#define NV01_IMAGE_BLIT_SIZE_W_MASK 0x0000ffff +#define NV01_IMAGE_BLIT_SIZE_H_SHIFT 16 +#define NV01_IMAGE_BLIT_SIZE_H_MASK 0xffff0000 + + +#define NV01_IMAGE_FROM_CPU 0x00000021 + +#define NV01_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV01_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV01_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 +#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x00000188 +#define NV01_IMAGE_FROM_CPU_PATTERN 0x0000018c +#define NV01_IMAGE_FROM_CPU_ROP 0x00000190 +#define NV01_IMAGE_FROM_CPU_BETA1 0x00000194 +#define NV01_IMAGE_FROM_CPU_SURFACE 0x00000198 +#define NV01_IMAGE_FROM_CPU_OPERATION 0x000002fc +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND 0x00000000 +#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND 0x00000001 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND 0x00000002 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY 0x00000003 +#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT 0x00000005 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_Y8 0x00000001 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5 0x00000002 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5 0x00000003 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8 0x00000004 +#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8 0x00000005 +#define NV01_IMAGE_FROM_CPU_POINT 0x00000304 +#define NV01_IMAGE_FROM_CPU_POINT_X_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_POINT_X_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_POINT_Y_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_POINT_Y_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT 0x00000308 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_SIZE_IN 0x0000030c +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 +#define NV01_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 +#define NV01_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV01_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 + + +#define NV01_NULL 0x00000030 + + + +#define NV03_STRETCHED_IMAGE_FROM_CPU 0x00000036 + +#define NV03_STRETCHED_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV03_STRETCHED_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY 0x00000184 +#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN 0x00000188 +#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP 0x0000018c +#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1 0x00000190 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000194 +#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION 0x000002fc +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN 0x00000304 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU 0x00000308 +#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV 0x0000030c +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT 0x00000310 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE 0x00000314 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4 0x00000318 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_SHIFT 0 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X_MASK 0x0000ffff +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_SHIFT 16 +#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y_MASK 0xffff0000 +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__SIZE 0x00000020 + + +#define NV03_SCALED_IMAGE_FROM_MEMORY 0x00000037 + +#define NV03_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 +#define NV03_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 +#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 +#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c +#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 +#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000194 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 +#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT 0x00000310 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_X_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_POINT_Y_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE 0x00000314 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_OUT_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DU_DX 0x00000318 +#define NV03_SCALED_IMAGE_FROM_MEMORY_DELTA_DV_DY 0x0000031c +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE 0x00000400 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_W_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_SIZE_H_MASK 0xffff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT 0x00000404 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_PITCH_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_MASK 0x00ff0000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_SHIFT 24 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_FORMAT_INTERPOLATOR_MASK 0xff000000 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_OFFSET 0x00000408 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT 0x0000040c +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_SHIFT 0 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_U_MASK 0x0000ffff +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_SHIFT 16 +#define NV03_SCALED_IMAGE_FROM_MEMORY_IMAGE_IN_POINT_V_MASK 0xffff0000 + + +#define NV04_DVD_SUBPICTURE 0x00000038 + +#define NV04_DVD_SUBPICTURE_NOP 0x00000100 +#define NV04_DVD_SUBPICTURE_NOTIFY 0x00000104 +#define NV04_DVD_SUBPICTURE_WAIT_FOR_IDLE 0x00000108 +#define NV04_DVD_SUBPICTURE_DMA_NOTIFY 0x00000180 +#define NV04_DVD_SUBPICTURE_DMA_OVERLAY 0x00000184 +#define NV04_DVD_SUBPICTURE_DMA_IMAGEIN 0x00000188 +#define NV04_DVD_SUBPICTURE_DMA_IMAGEOUT 0x0000018c +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT 0x00000300 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_X_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_POINT_Y_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE 0x00000304 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT 0x00000308 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEOUT_OFFSET 0x0000030c +#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DU_DX 0x00000310 +#define NV04_DVD_SUBPICTURE_IMAGEIN_DELTA_DV_DY 0x00000314 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE 0x00000318 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT 0x0000031c +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_IMAGEIN_OFFSET 0x00000320 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT 0x00000324 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_SHIFT 0 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_U_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_SHIFT 16 +#define NV04_DVD_SUBPICTURE_IMAGEIN_POINT_V_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DU_DX 0x00000328 +#define NV04_DVD_SUBPICTURE_OVERLAY_DELTA_DV_DY 0x0000032c +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE 0x00000330 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_W_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_SIZE_H_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT 0x00000334 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_FORMAT_COLOR_MASK 0xffff0000 +#define NV04_DVD_SUBPICTURE_OVERLAY_OFFSET 0x00000338 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT 0x0000033c +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_SHIFT 0 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_U_MASK 0x0000ffff +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_SHIFT 16 +#define NV04_DVD_SUBPICTURE_OVERLAY_POINT_V_MASK 0xffff0000 + + +#define NV04_MEMORY_TO_MEMORY_FORMAT 0x00000039 + +#define NV04_MEMORY_TO_MEMORY_FORMAT_NOP 0x00000100 +#define NV04_MEMORY_TO_MEMORY_FORMAT_NOTIFY 0x00000104 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY 0x00000180 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN 0x00000184 +#define NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT 0x00000188 +#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN 0x0000030c +#define NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT 0x00000310 +#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN 0x00000314 +#define NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT 0x00000318 +#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN 0x0000031c +#define NV04_MEMORY_TO_MEMORY_FORMAT_LINE_COUNT 0x00000320 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT 0x00000324 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_SHIFT 0 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_INPUT_INC_MASK 0x0000000f +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_SHIFT 8 +#define NV04_MEMORY_TO_MEMORY_FORMAT_FORMAT_OUTPUT_INC_MASK 0x00000f00 +#define NV04_MEMORY_TO_MEMORY_FORMAT_BUF_NOTIFY 0x00000328 + + +#define NV01_MEMORY_LOCAL_BANKED 0x0000003d + + + +#define NV01_MAPPING_SYSTEM 0x0000003e + + + +#define NV03_MEMORY_LOCAL_CURSOR 0x0000003f + + + +#define NV01_MEMORY_LOCAL_LINEAR 0x00000040 + + + +#define NV01_MAPPING_LOCAL 0x00000041 + + + +#define NV04_CONTEXT_SURFACES_2D 0x00000042 + +#define NV04_CONTEXT_SURFACES_2D_NOP 0x00000100 +#define NV04_CONTEXT_SURFACES_2D_NOTIFY 0x00000104 +#define NV04_CONTEXT_SURFACES_2D_PM_TRIGGER 0x00000140 +#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE 0x00000184 +#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN 0x00000188 +#define NV04_CONTEXT_SURFACES_2D_FORMAT 0x00000300 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8 0x00000001 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5 0x00000004 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16 0x00000005 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8 0x0000000a +#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32 0x0000000b +#define NV04_CONTEXT_SURFACES_2D_PITCH 0x00000304 +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 +#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 +#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 +#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c + + +#define NV03_CONTEXT_ROP 0x00000043 + +#define NV03_CONTEXT_ROP_NOP 0x00000100 +#define NV03_CONTEXT_ROP_NOTIFY 0x00000104 +#define NV03_CONTEXT_ROP_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_ROP_ROP 0x00000300 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SHIFT 0 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_MASK 0x0000000f +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_CLEAR 0x00000000 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOR 0x00000001 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_INVERTED 0x00000002 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY_INVERTED 0x00000003 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND_REVERSE 0x00000004 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_INVERT 0x00000005 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_XOR 0x00000006 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NAND 0x00000007 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_AND 0x00000008 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_EQUI 0x00000009 +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_NOOP 0x0000000a +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_INVERTED 0x0000000b +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_COPY 0x0000000c +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR_REVERSE 0x0000000d +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_OR 0x0000000e +#define NV03_CONTEXT_ROP_ROP_DST_LOGIC_OP_SET 0x0000000f +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SHIFT 4 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_MASK 0x000000f0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_CLEAR 0x00000000 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOR 0x00000010 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_INVERTED 0x00000020 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY_INVERTED 0x00000030 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND_REVERSE 0x00000040 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_INVERT 0x00000050 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_XOR 0x00000060 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NAND 0x00000070 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_AND 0x00000080 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_EQUI 0x00000090 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_NOOP 0x000000a0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_INVERTED 0x000000b0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_COPY 0x000000c0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR_REVERSE 0x000000d0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_OR 0x000000e0 +#define NV03_CONTEXT_ROP_ROP_SRC_LOGIC_OP_SET 0x000000f0 + + +#define NV04_IMAGE_PATTERN 0x00000044 + +#define NV04_IMAGE_PATTERN_NOP 0x00000100 +#define NV04_IMAGE_PATTERN_NOTIFY 0x00000104 +#define NV04_IMAGE_PATTERN_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT 0x00000300 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT 0x00000304 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE 0x00000002 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE 0x00000308 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8 0x00000000 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1 0x00000001 +#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64 0x00000002 +#define NV04_IMAGE_PATTERN_PATTERN_SELECT 0x0000030c +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO 0x00000001 +#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR 0x00000002 +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0 0x00000310 +#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1 0x00000314 +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0 0x00000318 +#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1 0x0000031c +#define NV04_IMAGE_PATTERN_PATTERN_Y8(x) (0x00000400+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_Y8__SIZE 0x00000010 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0_MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1_MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2_MASK 0x00ff0000 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_SHIFT 24 +#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3_MASK 0xff000000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(x) (0x00000500+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__SIZE 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0_MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0_MASK 0x000007e0 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_SHIFT 11 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0_MASK 0x0000f800 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1_MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1_MASK 0x07e00000 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_SHIFT 27 +#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1_MASK 0xf8000000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(x) (0x00000600+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__SIZE 0x00000020 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0_MASK 0x0000001f +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_SHIFT 5 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0_MASK 0x000003e0 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_SHIFT 10 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0_MASK 0x00007c00 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1_MASK 0x001f0000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_SHIFT 21 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1_MASK 0x03e00000 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_SHIFT 26 +#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1_MASK 0x7c000000 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(x) (0x00000700+((x)*4)) +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__SIZE 0x00000040 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_SHIFT 0 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B_MASK 0x000000ff +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_SHIFT 8 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G_MASK 0x0000ff00 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_SHIFT 16 +#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R_MASK 0x00ff0000 + + +#define NV03_VIDEO_LUT_CURSOR_DAC 0x00000046 + +#define NV03_VIDEO_LUT_CURSOR_DAC_SYNCHRONIZE 0x00000100 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_IMAGE 0x00000104 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_CURSOR 0x00000108 +#define NV03_VIDEO_LUT_CURSOR_DAC_STOP_DAC 0x0000010c +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_NOTIFY 0x00000180 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE(x) (0x00000184+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_IMAGE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT(x) (0x0000018c+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_LUT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR(x) (0x00000194+((x)*4)) +#define NV03_VIDEO_LUT_CURSOR_DAC_DMA_CURSOR__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_GET 0x000002fc +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET(x) (0x00000300+((x)*8)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_OFFSET__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT(x) (0x00000304+((x)*8)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_PITCH_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_COLOR_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_IMAGE_FORMAT_NOTIFY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET(x) (0x00000340+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_OFFSET__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT(x) (0x00000344+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_X_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_Y_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT(x) (0x00000348+((x)*12)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_FORMAT__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A 0x00000358 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_X_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_CURSOR_POINT_OUT_A_Y_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE(x) (0x00000380+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_W_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_IMAGE_SIZE_H_MASK 0xffff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC(x) (0x00000384+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_START_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_WIDTH_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_HSYNC_POLARITY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC(x) (0x00000388+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_START_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_WIDTH_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_VSYNC_POLARITY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE(x) (0x0000038c+((x)*16)) +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE__SIZE 0x00000002 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_SHIFT 0 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_WIDTH_MASK 0x0000ffff +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_SHIFT 16 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_HEIGHT_MASK 0x0fff0000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_SHIFT 28 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_DAC_TOTAL_SIZE_NOTIFY_MASK 0xf0000000 +#define NV03_VIDEO_LUT_CURSOR_DAC_SET_PIXEL_CLOCK 0x000003a0 + + +#define NV03_DX3_TEXTURED_TRIANGLE 0x00000048 + +#define NV03_DX3_TEXTURED_TRIANGLE_NOP 0x00000100 +#define NV03_DX3_TEXTURED_TRIANGLE_NOTIFY 0x00000104 +#define NV03_DX3_TEXTURED_TRIANGLE_PATCH 0x0000010c +#define NV03_DX3_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV03_DX3_TEXTURED_TRIANGLE_DMA_TEXTURE 0x00000184 +#define NV03_DX3_TEXTURED_TRIANGLE_CLIP_RECTANGLE 0x00000188 +#define NV03_DX3_TEXTURED_TRIANGLE_SURFACE 0x0000018c +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_OFFSET 0x00000304 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT 0x00000308 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_MASK_MASK 0x0000ffff +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_KEY_ENABLE_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_COLOR_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MIN_MASK 0x0f000000 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_SHIFT 28 +#define NV03_DX3_TEXTURED_TRIANGLE_TEXTURE_FORMAT_SIZE_MAX_MASK 0xf0000000 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER 0x0000030c +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_X_MASK 0x0000001f +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SPREAD_Y_MASK 0x00001f00 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_FILTER_SIZE_ADJUST_MASK 0x00ff0000 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR 0x00000310 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_B_MASK 0x000000ff +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_G_MASK 0x0000ff00 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_FOG_COLOR_R_MASK 0x00ff0000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT 0x00000314 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_INTERPOLATOR_MASK 0x0000000f +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_SHIFT 4 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_U_MASK 0x00000030 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_SHIFT 6 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_WRAP_V_MASK 0x000000c0 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SOURCE_COLOR_MASK 0x00000f00 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_SHIFT 12 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_CULLING_MASK 0x00007000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_PERSPECTIVE_ENABLE (1 << 15) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_FUNC_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_Z_WRITE_ENABLE_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_COLOR_WRITE_ENABLE_MASK 0x07000000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_SHIFT 27 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_ROP_MASK 0x18000000 +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_BETA (1 << 29) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_DST_BLEND (1 << 30) +#define NV03_DX3_TEXTURED_TRIANGLE_CONTROL_OUT_SRC_BLEND (1 << 31) +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL 0x00000318 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_REF_MASK 0x000000ff +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_ALPHA_CONTROL_ALPHA_FUNC_MASK 0xffffff00 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR(x) (0x00001000+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_SHIFT 0 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I0_MASK 0x0000000f +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_SHIFT 4 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I1_MASK 0x000000f0 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_SHIFT 8 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I2_MASK 0x00000f00 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_SHIFT 12 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I3_MASK 0x0000f000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_SHIFT 16 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I4_MASK 0x000f0000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_SHIFT 20 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_I5_MASK 0x00f00000 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_SHIFT 24 +#define NV03_DX3_TEXTURED_TRIANGLE_SPECULAR_FOG_MASK 0xff000000 +#define NV03_DX3_TEXTURED_TRIANGLE_COLOR(x) (0x00001004+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_COLOR__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_X(x) (0x00001008+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_X__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_Y(x) (0x0000100c+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_Y__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_Z(x) (0x00001010+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_Z__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_M(x) (0x00001014+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_M__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_U(x) (0x00001018+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_U__SIZE 0x00000040 +#define NV03_DX3_TEXTURED_TRIANGLE_V(x) (0x0000101c+((x)*32)) +#define NV03_DX3_TEXTURED_TRIANGLE_V__SIZE 0x00000040 + + +#define NV04_GDI_RECTANGLE_TEXT 0x0000004a + +#define NV04_GDI_RECTANGLE_TEXT_NOP 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 +#define NV04_GDI_RECTANGLE_TEXT_PATCH 0x0000010c +#define NV04_GDI_RECTANGLE_TEXT_PM_TRIGGER 0x00000140 +#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 +#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS 0x00000184 +#define NV04_GDI_RECTANGLE_TEXT_PATTERN 0x00000188 +#define NV04_GDI_RECTANGLE_TEXT_ROP 0x0000018c +#define NV04_GDI_RECTANGLE_TEXT_BETA1 0x00000190 +#define NV04_GDI_RECTANGLE_TEXT_BETA4 0x00000194 +#define NV04_GDI_RECTANGLE_TEXT_SURFACE 0x00000198 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY 0x00000003 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT 0x00000005 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6 0x00000001 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE 0x00000002 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(x) (0x00000400+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(x) (0x00000404+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0 0x000005f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1 0x000005f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B 0x000005fc +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(x) (0x00000600+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(x) (0x00000604+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__SIZE 0x00000020 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x000007ec +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x000007f0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C 0x000007f4 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C 0x000007f8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C 0x000007fc +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000800+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000080 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x00000be4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x00000be8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E 0x00000bec +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E 0x00000bf0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x00000bf4 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x00000bf8 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E 0x00000bfc +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00000c00+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000080 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F 0x00000ff0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET_MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_SHIFT 28 +#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH_MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0 0x00000ff4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1 0x00000ff8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F 0x00000ffc +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(x) (0x00001000+((x)*4)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__SIZE 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX_MASK 0x000000ff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_SHIFT 8 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X_MASK 0x000fff00 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_SHIFT 20 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y_MASK 0xfff00000 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G 0x000017f0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET_MASK 0x0fffffff +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_SHIFT 28 +#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH_MASK 0xf0000000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0 0x000017f4 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1 0x000017f8 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G 0x000017fc +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(x) (0x00001800+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__SIZE 0x00000100 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_SHIFT 0 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X_MASK 0x0000ffff +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_SHIFT 16 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y_MASK 0xffff0000 +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(x) (0x00001804+((x)*8)) +#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__SIZE 0x00000100 + + +#define NV03_GDI_RECTANGLE_TEXT 0x0000004b + +#define NV03_GDI_RECTANGLE_TEXT_NOP 0x00000100 +#define NV03_GDI_RECTANGLE_TEXT_NOTIFY 0x00000104 +#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY 0x00000180 +#define NV03_GDI_RECTANGLE_TEXT_PATTERN 0x00000184 +#define NV03_GDI_RECTANGLE_TEXT_ROP 0x00000188 +#define NV03_GDI_RECTANGLE_TEXT_BETA1 0x0000018c +#define NV03_GDI_RECTANGLE_TEXT_SURFACE 0x00000190 +#define NV03_GDI_RECTANGLE_TEXT_OPERATION 0x000002fc +#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT 0x00000300 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT 0x00000304 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A 0x000003fc +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT 0x00000400 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE 0x00000404 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B 0x000007f4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B 0x000007f8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B 0x000007fc +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0 0x00000800 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1 0x00000804 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0 0x00000bec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1 0x00000bf0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C 0x00000bf4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C 0x00000bf8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C 0x00000bfc +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(x) (0x00000c00+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__SIZE 0x00000020 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0 0x00000fe8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1 0x00000fec +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D 0x00000ff0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D 0x00000ff4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D 0x00000ff8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D 0x00000ffc +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(x) (0x00001000+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__SIZE 0x00000020 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0 0x000013e4 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1 0x000013e8 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E 0x000013ec +#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E 0x000013f0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E 0x000013f4 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E 0x000013f8 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E 0x000013fc +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_SHIFT 0 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X_MASK 0x0000ffff +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_SHIFT 16 +#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y_MASK 0xffff0000 +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(x) (0x00001400+((x)*4)) +#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__SIZE 0x00000020 + + +#define NV04_SWIZZLED_SURFACE 0x00000052 + +#define NV04_SWIZZLED_SURFACE_NOP 0x00000100 +#define NV04_SWIZZLED_SURFACE_NOTIFY 0x00000104 +#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY 0x00000180 +#define NV04_SWIZZLED_SURFACE_DMA_IMAGE 0x00000184 +#define NV04_SWIZZLED_SURFACE_FORMAT 0x00000300 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_SHIFT 0 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_MASK 0x000000ff +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8 0x00000001 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000002 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000003 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5 0x00000004 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16 0x00000005 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000006 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000007 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000008 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000009 +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8 0x0000000a +#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32 0x0000000b +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_MASK 0xff000000 +#define NV04_SWIZZLED_SURFACE_OFFSET 0x00000304 + + +#define NV04_CONTEXT_SURFACES_3D 0x00000053 + +#define NV04_CONTEXT_SURFACES_3D_NOP 0x00000100 +#define NV04_CONTEXT_SURFACES_3D_NOTIFY 0x00000104 +#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 +#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR 0x00000184 +#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA 0x00000188 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL 0x000002f8 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL 0x000002fc +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_FORMAT 0x00000300 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_MASK 0x000000ff +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8 0x00000006 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8 0x00000007 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SHIFT 8 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_MASK 0x0000ff00 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH 0x00000100 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE 0x00000200 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U_MASK 0x00ff0000 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V_MASK 0xff000000 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE 0x00000304 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_PITCH 0x00000308 +#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_SHIFT 0 +#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR_MASK 0x0000ffff +#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_SHIFT 16 +#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA_MASK 0xffff0000 +#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x0000030c +#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000310 + + +#define NV04_DX5_TEXTURED_TRIANGLE 0x00000054 + +#define NV04_DX5_TEXTURED_TRIANGLE_NOP 0x00000100 +#define NV04_DX5_TEXTURED_TRIANGLE_NOTIFY 0x00000104 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_A 0x00000184 +#define NV04_DX5_TEXTURED_TRIANGLE_DMA_B 0x00000188 +#define NV04_DX5_TEXTURED_TRIANGLE_SURFACE 0x0000018c +#define NV04_DX5_TEXTURED_TRIANGLE_COLORKEY 0x00000300 +#define NV04_DX5_TEXTURED_TRIANGLE_OFFSET 0x00000304 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT 0x00000308 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_DMA_MASK 0x00000003 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_SHIFT 2 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_MATCH_MASK 0x0000000c +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 0x00000020 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER 0x00000080 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8 0x00000100 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5 0x00000200 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5 0x00000300 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4 0x00000400 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5 0x00000500 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8 0x00000600 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8 0x00000700 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT 0x01000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT 0x02000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE 0x03000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER 0x04000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP 0x05000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPU (1 << 27) +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT 0x10000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT 0x20000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE 0x30000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER 0x40000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP 0x50000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FORMAT_WRAPV (1 << 31) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER 0x0000030c +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST 0x01000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR 0x02000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST 0x10000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR 0x20000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND 0x00000310 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MASK 0x0000000f +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD 0x00000080 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG 0x000000c0 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_SRC_MASK 0x0f000000 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_SHIFT 28 +#define NV04_DX5_TEXTURED_TRIANGLE_BLEND_DST_MASK 0xf0000000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL 0x00000314 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE (1 << 12) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN (1 << 13) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT 14 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_MASK 0x0000c000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_MASK 0x00300000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE (1 << 22) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE (1 << 23) +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_MASK 0x3f000000 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT 30 +#define NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_MASK 0xc0000000 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR 0x00000318 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(x) (0x00000400+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY(x) (0x00000404+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SY__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ(x) (0x00000408+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SZ__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW(x) (0x0000040c+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_RHW__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR(x) (0x00000410+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(x) (0x00000414+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B_MASK 0x000000ff +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G_MASK 0x0000ff00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R_MASK 0x00ff0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_SHIFT 24 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG_MASK 0xff000000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU(x) (0x00000418+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TU__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV(x) (0x0000041c+((x)*32)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_TV__SIZE 0x00000010 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(x) (0x00000600+((x)*4)) +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE__SIZE 0x00000040 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 +#define NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 + + +#define NV04_DX6_MULTITEX_TRIANGLE 0x00000055 + +#define NV04_DX6_MULTITEX_TRIANGLE_NOP 0x00000100 +#define NV04_DX6_MULTITEX_TRIANGLE_NOTIFY 0x00000104 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_NOTIFY 0x00000180 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_A 0x00000184 +#define NV04_DX6_MULTITEX_TRIANGLE_DMA_B 0x00000188 +#define NV04_DX6_MULTITEX_TRIANGLE_SURFACE 0x0000018c +#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET(x) (0x00000308+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_OFFSET__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT(x) (0x00000310+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_DMA_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_MASK 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_SHIFT 6 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_MASK 0x000000c0 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_COLOR_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MASK 0x07000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPU (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MASK 0x70000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FORMAT_WRAPV (1 << 31) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER(x) (0x00000318+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER__SIZE 0x00000002 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y_MASK 0x00007f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE (1 << 15) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MINIFY_MASK 0x07000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_MAGNIFY_MASK 0x70000000 +#define NV04_DX6_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE (1 << 31) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA 0x00000320 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_ALPHA_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR 0x00000324 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_0_COLOR_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA 0x0000032c +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_ALPHA_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR 0x00000330 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE0 (1 << 0) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA0 (1 << 1) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_SHIFT 2 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT0_MASK 0x000000fc +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE1 (1 << 8) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA1 (1 << 9) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_SHIFT 10 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT1_MASK 0x0000fc00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE2 (1 << 16) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA2 (1 << 17) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_SHIFT 18 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT2_MASK 0x00fc0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_INVERSE3 (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ALPHA3 (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_SHIFT 26 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_ARGUMENT3_MASK 0x1c000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_SHIFT 29 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_1_COLOR_OPERATION_MASK 0xe0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR 0x00000334 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_COMBINE_FACTOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND 0x00000338 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MASK 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_SHIFT 6 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_MASK 0x000000c0 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_ALPHA_ENABLE_MASK 0x00f00000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_SRC_MASK 0x0f000000 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_SHIFT 28 +#define NV04_DX6_MULTITEX_TRIANGLE_BLEND_DST_MASK 0xf0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0 0x0000033c +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_TEST_ENABLE (1 << 12) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ORIGIN (1 << 13) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_SHIFT 14 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE_MASK 0x0000c000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_MASK 0x00300000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE (1 << 22) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 23) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE_ENABLE (1 << 24) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE_ENABLE (1 << 25) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE_ENABLE (1 << 26) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE_ENABLE (1 << 27) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE_ENABLE (1 << 28) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE_ENABLE (1 << 29) +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_SHIFT 30 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_MASK 0xc0000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1 0x00000340 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_TEST_ENABLE_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2 0x00000344 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR 0x00000348 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_FOGCOLOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX(x) (0x00000400+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SX__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY(x) (0x00000404+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SY__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(x) (0x00000408+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SZ__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(x) (0x0000040c+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_RHW__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(x) (0x00000410+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(x) (0x00000414+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B_MASK 0x000000ff +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G_MASK 0x0000ff00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R_MASK 0x00ff0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_SHIFT 24 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG_MASK 0xff000000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(x) (0x00000418+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU0__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(x) (0x0000041c+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV0__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(x) (0x00000420+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TU1__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(x) (0x00000424+((x)*40)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_TV1__SIZE 0x00000008 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE(x) (0x00000540+((x)*4)) +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE__SIZE 0x00000030 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_SHIFT 0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I0_MASK 0x0000000f +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_SHIFT 4 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I1_MASK 0x000000f0 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_SHIFT 8 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I2_MASK 0x00000f00 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_SHIFT 12 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I3_MASK 0x0000f000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_SHIFT 16 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I4_MASK 0x000f0000 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_SHIFT 20 +#define NV04_DX6_MULTITEX_TRIANGLE_TLMTVERTEX_DRAWPRIMITIVE_I5_MASK 0x00f00000 + + +#define NV10_DX5_TEXTURED_TRIANGLE 0x00000094 + + + +#define NV10TCL 0x00000056 + +#define NV10TCL_NOP 0x00000100 +#define NV10TCL_NOTIFY 0x00000104 +#define NV10TCL_DMA_NOTIFY 0x00000180 +#define NV10TCL_DMA_IN_MEMORY0 0x00000184 +#define NV10TCL_DMA_IN_MEMORY1 0x00000188 +#define NV10TCL_DMA_VTXBUF0 0x0000018c +#define NV10TCL_DMA_IN_MEMORY2 0x00000194 +#define NV10TCL_DMA_IN_MEMORY3 0x00000198 +#define NV10TCL_RT_HORIZ 0x00000200 +#define NV10TCL_RT_HORIZ_X_SHIFT 0 +#define NV10TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV10TCL_RT_HORIZ_W_SHIFT 16 +#define NV10TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV10TCL_RT_VERT 0x00000204 +#define NV10TCL_RT_VERT_Y_SHIFT 0 +#define NV10TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV10TCL_RT_VERT_H_SHIFT 16 +#define NV10TCL_RT_VERT_H_MASK 0xffff0000 +#define NV10TCL_RT_FORMAT 0x00000208 +#define NV10TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV10TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV10TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV10TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV10TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV10TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV10TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV10TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV10TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV10TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV10TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d +#define NV10TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV10TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV10TCL_RT_PITCH 0x0000020c +#define NV10TCL_RT_PITCH_COLOR_PITCH_SHIFT 0 +#define NV10TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff +#define NV10TCL_RT_PITCH_ZETA_PITCH_SHIFT 16 +#define NV10TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000 +#define NV10TCL_COLOR_OFFSET 0x00000210 +#define NV10TCL_ZETA_OFFSET 0x00000214 +#define NV10TCL_TX_OFFSET(x) (0x00000218+((x)*4)) +#define NV10TCL_TX_OFFSET__SIZE 0x00000002 +#define NV10TCL_TX_FORMAT(x) (0x00000220+((x)*4)) +#define NV10TCL_TX_FORMAT__SIZE 0x00000002 +#define NV10TCL_TX_FORMAT_DMA0 (1 << 0) +#define NV10TCL_TX_FORMAT_DMA1 (1 << 1) +#define NV10TCL_TX_FORMAT_CUBE_MAP (1 << 2) +#define NV10TCL_TX_FORMAT_FORMAT_SHIFT 7 +#define NV10TCL_TX_FORMAT_FORMAT_MASK 0x00000780 +#define NV10TCL_TX_FORMAT_FORMAT_L8 0x00000000 +#define NV10TCL_TX_FORMAT_FORMAT_A8 0x00000080 +#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000100 +#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000180 +#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000200 +#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000280 +#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000300 +#define NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000380 +#define NV10TCL_TX_FORMAT_FORMAT_INDEX8 0x00000580 +#define NV10TCL_TX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV10TCL_TX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV10TCL_TX_FORMAT_FORMAT_DXT5 0x00000780 +#define NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00000800 +#define NV10TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00000880 +#define NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00000900 +#define NV10TCL_TX_FORMAT_FORMAT_L8_RECT 0x00000980 +#define NV10TCL_TX_FORMAT_FORMAT_A8L8 0x00000d00 +#define NV10TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00000d80 +#define NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00000e80 +#define NV10TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00000f00 +#define NV10TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00001000 +#define NV10TCL_TX_FORMAT_FORMAT_DSDT 0x00001400 +#define NV10TCL_TX_FORMAT_FORMAT_A16 0x00001900 +#define NV10TCL_TX_FORMAT_FORMAT_HILO16 0x00001980 +#define NV10TCL_TX_FORMAT_FORMAT_A16_RECT 0x00001a80 +#define NV10TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00001b00 +#define NV10TCL_TX_FORMAT_FORMAT_HILO8 0x00002200 +#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00002280 +#define NV10TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00002300 +#define NV10TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00002380 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00002500 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00002580 +#define NV10TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00002600 +#define NV10TCL_TX_FORMAT_NPOT (1 << 11) +#define NV10TCL_TX_FORMAT_MIPMAP (1 << 15) +#define NV10TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 16 +#define NV10TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x000f0000 +#define NV10TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 20 +#define NV10TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x00f00000 +#define NV10TCL_TX_FORMAT_WRAP_S_SHIFT 24 +#define NV10TCL_TX_FORMAT_WRAP_S_MASK 0x0f000000 +#define NV10TCL_TX_FORMAT_WRAP_S_REPEAT 0x01000000 +#define NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT 0x02000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE 0x03000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER 0x04000000 +#define NV10TCL_TX_FORMAT_WRAP_S_CLAMP 0x05000000 +#define NV10TCL_TX_FORMAT_WRAP_T_SHIFT 28 +#define NV10TCL_TX_FORMAT_WRAP_T_MASK 0xf0000000 +#define NV10TCL_TX_FORMAT_WRAP_T_REPEAT 0x10000000 +#define NV10TCL_TX_FORMAT_WRAP_T_MIRRORED_REPEAT 0x20000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE 0x30000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_BORDER 0x40000000 +#define NV10TCL_TX_FORMAT_WRAP_T_CLAMP 0x50000000 +#define NV10TCL_TX_ENABLE(x) (0x00000228+((x)*4)) +#define NV10TCL_TX_ENABLE__SIZE 0x00000002 +#define NV10TCL_TX_ENABLE_ANISOTROPY_SHIFT 4 +#define NV10TCL_TX_ENABLE_ANISOTROPY_MASK 0x00000030 +#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14 +#define NV10TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000 +#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26 +#define NV10TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000 +#define NV10TCL_TX_ENABLE_ENABLE (1 << 30) +#define NV10TCL_TX_NPOT_PITCH(x) (0x00000230+((x)*4)) +#define NV10TCL_TX_NPOT_PITCH__SIZE 0x00000002 +#define NV10TCL_TX_NPOT_PITCH_PITCH_SHIFT 16 +#define NV10TCL_TX_NPOT_PITCH_PITCH_MASK 0xffff0000 +#define NV10TCL_TX_NPOT_SIZE(x) (0x00000240+((x)*4)) +#define NV10TCL_TX_NPOT_SIZE__SIZE 0x00000002 +#define NV10TCL_TX_NPOT_SIZE_H_SHIFT 0 +#define NV10TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff +#define NV10TCL_TX_NPOT_SIZE_W_SHIFT 16 +#define NV10TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 +#define NV10TCL_TX_FILTER(x) (0x00000248+((x)*4)) +#define NV10TCL_TX_FILTER__SIZE 0x00000002 +#define NV10TCL_TX_FILTER_LOD_BIAS_SHIFT 8 +#define NV10TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00 +#define NV10TCL_TX_FILTER_MINIFY_SHIFT 24 +#define NV10TCL_TX_FILTER_MINIFY_MASK 0x0f000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST 0x01000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR 0x02000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x03000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x04000000 +#define NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x05000000 +#define NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x06000000 +#define NV10TCL_TX_FILTER_MAGNIFY_SHIFT 28 +#define NV10TCL_TX_FILTER_MAGNIFY_MASK 0xf0000000 +#define NV10TCL_TX_FILTER_MAGNIFY_NEAREST 0x10000000 +#define NV10TCL_TX_FILTER_MAGNIFY_LINEAR 0x20000000 +#define NV10TCL_TX_PALETTE_OFFSET(x) (0x00000250+((x)*4)) +#define NV10TCL_TX_PALETTE_OFFSET__SIZE 0x00000002 +#define NV10TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) +#define NV10TCL_RC_IN_ALPHA__SIZE 0x00000002 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_IN_RGB(x) (0x00000268+((x)*4)) +#define NV10TCL_RC_IN_RGB__SIZE 0x00000002 +#define NV10TCL_RC_IN_RGB_D_INPUT_SHIFT 0 +#define NV10TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_IN_RGB_C_INPUT_SHIFT 8 +#define NV10TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SHIFT 16 +#define NV10TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SHIFT 24 +#define NV10TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_COLOR(x) (0x00000270+((x)*4)) +#define NV10TCL_RC_COLOR__SIZE 0x00000002 +#define NV10TCL_RC_COLOR_B_SHIFT 0 +#define NV10TCL_RC_COLOR_B_MASK 0x000000ff +#define NV10TCL_RC_COLOR_G_SHIFT 8 +#define NV10TCL_RC_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_RC_COLOR_R_SHIFT 16 +#define NV10TCL_RC_COLOR_R_MASK 0x00ff0000 +#define NV10TCL_RC_COLOR_A_SHIFT 24 +#define NV10TCL_RC_COLOR_A_MASK 0xff000000 +#define NV10TCL_RC_OUT_ALPHA(x) (0x00000278+((x)*4)) +#define NV10TCL_RC_OUT_ALPHA__SIZE 0x00000002 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV10TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12) +#define NV10TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13) +#define NV10TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14) +#define NV10TCL_RC_OUT_ALPHA_BIAS (1 << 15) +#define NV10TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SHIFT 17 +#define NV10TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV10TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV10TCL_RC_OUT_RGB(x) (0x00000280+((x)*4)) +#define NV10TCL_RC_OUT_RGB__SIZE 0x00000002 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV10TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12) +#define NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13) +#define NV10TCL_RC_OUT_RGB_MUX_SUM (1 << 14) +#define NV10TCL_RC_OUT_RGB_BIAS (1 << 15) +#define NV10TCL_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV10TCL_RC_OUT_RGB_SCALE_SHIFT 17 +#define NV10TCL_RC_OUT_RGB_SCALE_MASK 0x00000000 +#define NV10TCL_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV10TCL_RC_OUT_RGB_OPERATION_SHIFT 27 +#define NV10TCL_RC_OUT_RGB_OPERATION_MASK 0x38000000 +#define NV10TCL_RC_FINAL0 0x00000288 +#define NV10TCL_RC_FINAL0_D_INPUT_SHIFT 0 +#define NV10TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f +#define NV10TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV10TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV10TCL_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV10TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV10TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV10TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d +#define NV10TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV10TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4) +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV10TCL_RC_FINAL0_D_MAPPING_SHIFT 5 +#define NV10TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0 +#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV10TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV10TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV10TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV10TCL_RC_FINAL0_C_INPUT_SHIFT 8 +#define NV10TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV10TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SHIFT 13 +#define NV10TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_FINAL0_B_INPUT_SHIFT 16 +#define NV10TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV10TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SHIFT 21 +#define NV10TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_FINAL0_A_INPUT_SHIFT 24 +#define NV10TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV10TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SHIFT 29 +#define NV10TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_RC_FINAL1 0x0000028c +#define NV10TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7) +#define NV10TCL_RC_FINAL1_G_INPUT_SHIFT 8 +#define NV10TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00 +#define NV10TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV10TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV10TCL_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV10TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV10TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800 +#define NV10TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00 +#define NV10TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV10TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12) +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SHIFT 13 +#define NV10TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000 +#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV10TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV10TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV10TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV10TCL_RC_FINAL1_F_INPUT_SHIFT 16 +#define NV10TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000 +#define NV10TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV10TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV10TCL_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV10TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV10TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000 +#define NV10TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000 +#define NV10TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV10TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20) +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SHIFT 21 +#define NV10TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV10TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV10TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV10TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV10TCL_RC_FINAL1_E_INPUT_SHIFT 24 +#define NV10TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000 +#define NV10TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV10TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV10TCL_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV10TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000 +#define NV10TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000 +#define NV10TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV10TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28) +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV10TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SHIFT 29 +#define NV10TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV10TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV10TCL_LIGHT_MODEL 0x00000294 +#define NV10TCL_LIGHT_MODEL_COLOR_CONTROL (1 << 1) +#define NV10TCL_LIGHT_MODEL_LOCAL_VIEWER (1 << 16) +#define NV10TCL_COLOR_MATERIAL_ENABLE 0x00000298 +#define NV10TCL_COLOR_MATERIAL_ENABLE_SPECULAR (1 << 0) +#define NV10TCL_COLOR_MATERIAL_ENABLE_DIFFUSE (1 << 1) +#define NV10TCL_COLOR_MATERIAL_ENABLE_AMBIENT (1 << 2) +#define NV10TCL_COLOR_MATERIAL_ENABLE_EMISSION (1 << 3) +#define NV10TCL_FOG_MODE 0x0000029c +#define NV10TCL_FOG_MODE_EXP 0x00000800 +#define NV10TCL_FOG_MODE_EXP_2 0x00000802 +#define NV10TCL_FOG_MODE_EXP2 0x00000803 +#define NV10TCL_FOG_MODE_LINEAR 0x00000804 +#define NV10TCL_FOG_MODE_LINEAR_2 0x00002601 +#define NV10TCL_FOG_COORD_DIST 0x000002a0 +#define NV10TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000 +#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001 +#define NV10TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002 +#define NV10TCL_FOG_COORD_DIST_COORD_FOG 0x00000003 +#define NV10TCL_FOG_ENABLE 0x000002a4 +#define NV10TCL_FOG_COLOR 0x000002a8 +#define NV10TCL_FOG_COLOR_R_SHIFT 0 +#define NV10TCL_FOG_COLOR_R_MASK 0x000000ff +#define NV10TCL_FOG_COLOR_G_SHIFT 8 +#define NV10TCL_FOG_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_FOG_COLOR_B_SHIFT 16 +#define NV10TCL_FOG_COLOR_B_MASK 0x00ff0000 +#define NV10TCL_FOG_COLOR_A_SHIFT 24 +#define NV10TCL_FOG_COLOR_A_MASK 0xff000000 +#define NV10TCL_VIEWPORT_CLIP_MODE 0x000002b4 +#define NV10TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) +#define NV10TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_SHIFT 0 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_L_MASK 0x000007ff +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE (1 << 11) +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_SHIFT 16 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_R_MASK 0x07ff0000 +#define NV10TCL_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE (1 << 27) +#define NV10TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) +#define NV10TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_SHIFT 0 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_T_MASK 0x000007ff +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE (1 << 11) +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_SHIFT 16 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_B_MASK 0x07ff0000 +#define NV10TCL_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE (1 << 27) +#define NV10TCL_ALPHA_FUNC_ENABLE 0x00000300 +#define NV10TCL_BLEND_FUNC_ENABLE 0x00000304 +#define NV10TCL_CULL_FACE_ENABLE 0x00000308 +#define NV10TCL_DEPTH_TEST_ENABLE 0x0000030c +#define NV10TCL_DITHER_ENABLE 0x00000310 +#define NV10TCL_LIGHTING_ENABLE 0x00000314 +#define NV10TCL_POINT_PARAMETERS_ENABLE 0x00000318 +#define NV10TCL_POINT_SMOOTH_ENABLE 0x0000031c +#define NV10TCL_LINE_SMOOTH_ENABLE 0x00000320 +#define NV10TCL_POLYGON_SMOOTH_ENABLE 0x00000324 +#define NV10TCL_VERTEX_WEIGHT_ENABLE 0x00000328 +#define NV10TCL_STENCIL_ENABLE 0x0000032c +#define NV10TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 +#define NV10TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 +#define NV10TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 +#define NV10TCL_ALPHA_FUNC_FUNC 0x0000033c +#define NV10TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV10TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV10TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV10TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV10TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV10TCL_ALPHA_FUNC_REF 0x00000340 +#define NV10TCL_BLEND_FUNC_SRC 0x00000344 +#define NV10TCL_BLEND_FUNC_SRC_ZERO 0x00000000 +#define NV10TCL_BLEND_FUNC_SRC_ONE 0x00000001 +#define NV10TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV10TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV10TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 +#define NV10TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 +#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV10TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 +#define NV10TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV10TCL_BLEND_FUNC_DST 0x00000348 +#define NV10TCL_BLEND_FUNC_DST_ZERO 0x00000000 +#define NV10TCL_BLEND_FUNC_DST_ONE 0x00000001 +#define NV10TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV10TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV10TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 +#define NV10TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 +#define NV10TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV10TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 +#define NV10TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV10TCL_BLEND_COLOR 0x0000034c +#define NV10TCL_BLEND_COLOR_B_SHIFT 0 +#define NV10TCL_BLEND_COLOR_B_MASK 0x000000ff +#define NV10TCL_BLEND_COLOR_G_SHIFT 8 +#define NV10TCL_BLEND_COLOR_G_MASK 0x0000ff00 +#define NV10TCL_BLEND_COLOR_R_SHIFT 16 +#define NV10TCL_BLEND_COLOR_R_MASK 0x00ff0000 +#define NV10TCL_BLEND_COLOR_A_SHIFT 24 +#define NV10TCL_BLEND_COLOR_A_MASK 0xff000000 +#define NV10TCL_BLEND_EQUATION 0x00000350 +#define NV10TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV10TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV10TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV10TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV10TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV10TCL_DEPTH_FUNC 0x00000354 +#define NV10TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV10TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV10TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV10TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV10TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV10TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV10TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV10TCL_COLOR_MASK 0x00000358 +#define NV10TCL_COLOR_MASK_B (1 << 0) +#define NV10TCL_COLOR_MASK_G (1 << 8) +#define NV10TCL_COLOR_MASK_R (1 << 16) +#define NV10TCL_COLOR_MASK_A (1 << 24) +#define NV10TCL_DEPTH_WRITE_ENABLE 0x0000035c +#define NV10TCL_STENCIL_MASK 0x00000360 +#define NV10TCL_STENCIL_FUNC_FUNC 0x00000364 +#define NV10TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV10TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV10TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV10TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV10TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV10TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV10TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 +#define NV10TCL_STENCIL_FUNC_REF 0x00000368 +#define NV10TCL_STENCIL_FUNC_MASK 0x0000036c +#define NV10TCL_STENCIL_OP_FAIL 0x00000370 +#define NV10TCL_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 +#define NV10TCL_STENCIL_OP_ZFAIL 0x00000374 +#define NV10TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV10TCL_STENCIL_OP_ZPASS 0x00000378 +#define NV10TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV10TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV10TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV10TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV10TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV10TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV10TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV10TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV10TCL_SHADE_MODEL 0x0000037c +#define NV10TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV10TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV10TCL_LINE_WIDTH 0x00000380 +#define NV10TCL_POLYGON_OFFSET_FACTOR 0x00000384 +#define NV10TCL_POLYGON_OFFSET_UNITS 0x00000388 +#define NV10TCL_POLYGON_MODE_FRONT 0x0000038c +#define NV10TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV10TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV10TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV10TCL_POLYGON_MODE_BACK 0x00000390 +#define NV10TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV10TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV10TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV10TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV10TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV10TCL_CULL_FACE 0x0000039c +#define NV10TCL_CULL_FACE_FRONT 0x00000404 +#define NV10TCL_CULL_FACE_BACK 0x00000405 +#define NV10TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV10TCL_FRONT_FACE 0x000003a0 +#define NV10TCL_FRONT_FACE_CW 0x00000900 +#define NV10TCL_FRONT_FACE_CCW 0x00000901 +#define NV10TCL_NORMALIZE_ENABLE 0x000003a4 +#define NV10TCL_COLOR_MATERIAL_R 0x000003a8 +#define NV10TCL_COLOR_MATERIAL_G 0x000003ac +#define NV10TCL_COLOR_MATERIAL_B 0x000003b0 +#define NV10TCL_COLOR_MATERIAL_A 0x000003b4 +#define NV10TCL_COLOR_CONTROL 0x000003b8 +#define NV10TCL_ENABLED_LIGHTS 0x000003bc +#define NV10TCL_ENABLED_LIGHTS_LIGHT0 (1 << 0) +#define NV10TCL_ENABLED_LIGHTS_LIGHT1 (1 << 2) +#define NV10TCL_ENABLED_LIGHTS_LIGHT2 (1 << 4) +#define NV10TCL_ENABLED_LIGHTS_LIGHT3 (1 << 6) +#define NV10TCL_ENABLED_LIGHTS_LIGHT4 (1 << 8) +#define NV10TCL_ENABLED_LIGHTS_LIGHT5 (1 << 10) +#define NV10TCL_ENABLED_LIGHTS_LIGHT6 (1 << 12) +#define NV10TCL_ENABLED_LIGHTS_LIGHT7 (1 << 14) +#define NV10TCL_TX_GEN_S(x) (0x000003c0+((x)*16)) +#define NV10TCL_TX_GEN_S__SIZE 0x00000002 +#define NV10TCL_TX_GEN_S_FALSE 0x00000000 +#define NV10TCL_TX_GEN_S_EYE_LINEAR 0x00002400 +#define NV10TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401 +#define NV10TCL_TX_GEN_S_SPHERE_MAP 0x00002402 +#define NV10TCL_TX_GEN_S_NORMAL_MAP 0x00008511 +#define NV10TCL_TX_GEN_S_REFLECTION_MAP 0x00008512 +#define NV10TCL_TX_GEN_T(x) (0x000003c4+((x)*16)) +#define NV10TCL_TX_GEN_T__SIZE 0x00000002 +#define NV10TCL_TX_GEN_T_FALSE 0x00000000 +#define NV10TCL_TX_GEN_T_EYE_LINEAR 0x00002400 +#define NV10TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401 +#define NV10TCL_TX_GEN_T_SPHERE_MAP 0x00002402 +#define NV10TCL_TX_GEN_T_NORMAL_MAP 0x00008511 +#define NV10TCL_TX_GEN_T_REFLECTION_MAP 0x00008512 +#define NV10TCL_TX_GEN_R(x) (0x000003c8+((x)*16)) +#define NV10TCL_TX_GEN_R__SIZE 0x00000002 +#define NV10TCL_TX_GEN_R_FALSE 0x00000000 +#define NV10TCL_TX_GEN_R_EYE_LINEAR 0x00002400 +#define NV10TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401 +#define NV10TCL_TX_GEN_R_SPHERE_MAP 0x00002402 +#define NV10TCL_TX_GEN_R_NORMAL_MAP 0x00008511 +#define NV10TCL_TX_GEN_R_REFLECTION_MAP 0x00008512 +#define NV10TCL_TX_GEN_Q(x) (0x000003cc+((x)*16)) +#define NV10TCL_TX_GEN_Q__SIZE 0x00000002 +#define NV10TCL_TX_GEN_Q_FALSE 0x00000000 +#define NV10TCL_TX_GEN_Q_EYE_LINEAR 0x00002400 +#define NV10TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401 +#define NV10TCL_TX_GEN_Q_SPHERE_MAP 0x00002402 +#define NV10TCL_TX_GEN_Q_NORMAL_MAP 0x00008511 +#define NV10TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512 +#define NV10TCL_TX_MATRIX_ENABLE(x) (0x000003e0+((x)*4)) +#define NV10TCL_TX_MATRIX_ENABLE__SIZE 0x00000002 +#define NV10TCL_VIEW_MATRIX_ENABLE 0x000003e8 +#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW1 (1 << 0) +#define NV10TCL_VIEW_MATRIX_ENABLE_MODELVIEW0 (1 << 1) +#define NV10TCL_VIEW_MATRIX_ENABLE_PROJECTION (1 << 2) +#define NV10TCL_POINT_SIZE 0x000003ec +#define NV10TCL_MODELVIEW0_MATRIX(x) (0x00000400+((x)*4)) +#define NV10TCL_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV10TCL_MODELVIEW1_MATRIX(x) (0x00000440+((x)*4)) +#define NV10TCL_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV10TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4)) +#define NV10TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV10TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4)) +#define NV10TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV10TCL_PROJECTION_MATRIX(x) (0x00000500+((x)*4)) +#define NV10TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV10TCL_TX0_MATRIX(x) (0x00000540+((x)*4)) +#define NV10TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV10TCL_TX1_MATRIX(x) (0x00000580+((x)*4)) +#define NV10TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV10TCL_CLIP_PLANE_A(x) (0x00000600+((x)*16)) +#define NV10TCL_CLIP_PLANE_A__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_B(x) (0x00000604+((x)*16)) +#define NV10TCL_CLIP_PLANE_B__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_C(x) (0x00000608+((x)*16)) +#define NV10TCL_CLIP_PLANE_C__SIZE 0x00000008 +#define NV10TCL_CLIP_PLANE_D(x) (0x0000060c+((x)*16)) +#define NV10TCL_CLIP_PLANE_D__SIZE 0x00000008 +#define NV10TCL_FOG_EQUATION_CONSTANT 0x00000680 +#define NV10TCL_FOG_EQUATION_LINEAR 0x00000684 +#define NV10TCL_FOG_EQUATION_QUADRATIC 0x00000688 +#define NV10TCL_FRONT_MATERIAL_SHININESS(x) (0x000006a0+((x)*4)) +#define NV10TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000006c4 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000006c8 +#define NV10TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000006cc +#define NV10TCL_VIEWPORT_SCALE_X 0x000006e8 +#define NV10TCL_VIEWPORT_SCALE_Y 0x000006ec +#define NV10TCL_VIEWPORT_SCALE_Z 0x000006f0 +#define NV10TCL_VIEWPORT_SCALE_W 0x000006f4 +#define NV10TCL_POINT_PARAMETER(x) (0x000006f8+((x)*4)) +#define NV10TCL_POINT_PARAMETER__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00000800+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00000804+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00000808+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000080c+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00000810+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00000814+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00000818+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000081c+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00000820+((x)*128)) +#define NV10TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_X(x) (0x00000828+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000082c+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_HALF_VECTOR_Z(x) (0x00000830+((x)*128)) +#define NV10TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_X(x) (0x00000834+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_Y(x) (0x00000838+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_DIRECTION_Z(x) (0x0000083c+((x)*128)) +#define NV10TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00000840+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00000844+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00000848+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_X(x) (0x0000084c+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_Y(x) (0x00000850+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_DIR_Z(x) (0x00000854+((x)*128)) +#define NV10TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00000858+((x)*128)) +#define NV10TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_X(x) (0x0000085c+((x)*128)) +#define NV10TCL_LIGHT_POSITION_X__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_Y(x) (0x00000860+((x)*128)) +#define NV10TCL_LIGHT_POSITION_Y__SIZE 0x00000008 +#define NV10TCL_LIGHT_POSITION_Z(x) (0x00000864+((x)*128)) +#define NV10TCL_LIGHT_POSITION_Z__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00000868+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000086c+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 +#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00000870+((x)*128)) +#define NV10TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 +#define NV10TCL_VERTEX_POS_3F_X 0x00000c00 +#define NV10TCL_VERTEX_POS_3F_Y 0x00000c04 +#define NV10TCL_VERTEX_POS_3F_Z 0x00000c08 +#define NV10TCL_VERTEX_POS_4F_X 0x00000c18 +#define NV10TCL_VERTEX_POS_4F_Y 0x00000c1c +#define NV10TCL_VERTEX_POS_4F_Z 0x00000c20 +#define NV10TCL_VERTEX_POS_4F_W 0x00000c24 +#define NV10TCL_VERTEX_NOR_3F_X 0x00000c30 +#define NV10TCL_VERTEX_NOR_3F_Y 0x00000c34 +#define NV10TCL_VERTEX_NOR_3F_Z 0x00000c38 +#define NV10TCL_VERTEX_NOR_3I_XY 0x00000c40 +#define NV10TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 +#define NV10TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff +#define NV10TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 +#define NV10TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 +#define NV10TCL_VERTEX_NOR_3I_Z 0x00000c44 +#define NV10TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 +#define NV10TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV10TCL_VERTEX_COL_4F_R 0x00000c50 +#define NV10TCL_VERTEX_COL_4F_G 0x00000c54 +#define NV10TCL_VERTEX_COL_4F_B 0x00000c58 +#define NV10TCL_VERTEX_COL_4F_A 0x00000c5c +#define NV10TCL_VERTEX_COL_3F_R 0x00000c60 +#define NV10TCL_VERTEX_COL_3F_G 0x00000c64 +#define NV10TCL_VERTEX_COL_3F_B 0x00000c68 +#define NV10TCL_VERTEX_COL_4I 0x00000c6c +#define NV10TCL_VERTEX_COL_4I_R_SHIFT 0 +#define NV10TCL_VERTEX_COL_4I_R_MASK 0x000000ff +#define NV10TCL_VERTEX_COL_4I_G_SHIFT 8 +#define NV10TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 +#define NV10TCL_VERTEX_COL_4I_B_SHIFT 16 +#define NV10TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 +#define NV10TCL_VERTEX_COL_4I_A_SHIFT 24 +#define NV10TCL_VERTEX_COL_4I_A_MASK 0xff000000 +#define NV10TCL_VERTEX_COL2_3F_R 0x00000c80 +#define NV10TCL_VERTEX_COL2_3F_G 0x00000c84 +#define NV10TCL_VERTEX_COL2_3F_B 0x00000c88 +#define NV10TCL_VERTEX_COL2_3I 0x00000c8c +#define NV10TCL_VERTEX_COL2_3I_R_SHIFT 0 +#define NV10TCL_VERTEX_COL2_3I_R_MASK 0x000000ff +#define NV10TCL_VERTEX_COL2_3I_G_SHIFT 8 +#define NV10TCL_VERTEX_COL2_3I_G_MASK 0x0000ff00 +#define NV10TCL_VERTEX_COL2_3I_B_SHIFT 16 +#define NV10TCL_VERTEX_COL2_3I_B_MASK 0x00ff0000 +#define NV10TCL_VERTEX_TX0_2F_S 0x00000c90 +#define NV10TCL_VERTEX_TX0_2F_T 0x00000c94 +#define NV10TCL_VERTEX_TX0_2I 0x00000c98 +#define NV10TCL_VERTEX_TX0_2I_S_SHIFT 0 +#define NV10TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_2I_T_SHIFT 16 +#define NV10TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX0_4F_S 0x00000ca0 +#define NV10TCL_VERTEX_TX0_4F_T 0x00000ca4 +#define NV10TCL_VERTEX_TX0_4F_R 0x00000ca8 +#define NV10TCL_VERTEX_TX0_4F_Q 0x00000cac +#define NV10TCL_VERTEX_TX0_4I_ST 0x00000cb0 +#define NV10TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 +#define NV10TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 +#define NV10TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX0_4I_RQ 0x00000cb4 +#define NV10TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 +#define NV10TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 +#define NV10TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_2F_S 0x00000cb8 +#define NV10TCL_VERTEX_TX1_2F_T 0x00000cbc +#define NV10TCL_VERTEX_TX1_2I 0x00000cc0 +#define NV10TCL_VERTEX_TX1_2I_S_SHIFT 0 +#define NV10TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_2I_T_SHIFT 16 +#define NV10TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_4F_S 0x00000cc8 +#define NV10TCL_VERTEX_TX1_4F_T 0x00000ccc +#define NV10TCL_VERTEX_TX1_4F_R 0x00000cd0 +#define NV10TCL_VERTEX_TX1_4F_Q 0x00000cd4 +#define NV10TCL_VERTEX_TX1_4I_ST 0x00000cd8 +#define NV10TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 +#define NV10TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 +#define NV10TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 +#define NV10TCL_VERTEX_TX1_4I_RQ 0x00000cdc +#define NV10TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 +#define NV10TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff +#define NV10TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 +#define NV10TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 +#define NV10TCL_VERTEX_FOG_1F 0x00000ce0 +#define NV10TCL_VERTEX_WGH_1F 0x00000ce4 +#define NV10TCL_EDGEFLAG_ENABLE 0x00000cec +#define NV10TCL_VERTEX_ARRAY_VALIDATE 0x00000cf0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(x) (0x00000d00+((x)*8)) +#define NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET__SIZE 0x00000008 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(x) (0x00000d04+((x)*8)) +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT__SIZE 0x00000008 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_POS 0x00000d00 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS 0x00000d04 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_POS_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_COL 0x00000d08 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL 0x00000d0c +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_COL2 0x00000d10 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2 0x00000d14 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_COL2_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_TX0 0x00000d18 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0 0x00000d1c +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX0_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_TX1 0x00000d20 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1 0x00000d24 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_TX1_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_NOR 0x00000d28 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR 0x00000d2c +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_NOR_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_WGH 0x00000d30 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH 0x00000d34 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_WGH_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_ARRAY_OFFSET_FOG 0x00000d38 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG 0x00000d3c +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_SHIFT 0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_TYPE_MASK 0x0000000f +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_SHIFT 4 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_FIELDS_MASK 0x000000f0 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_SHIFT 8 +#define NV10TCL_VERTEX_ARRAY_FORMAT_FOG_STRIDE_MASK 0x0000ff00 +#define NV10TCL_VERTEX_BEGIN_END 0x00000dfc +#define NV10TCL_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV10TCL_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV10TCL_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV10TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV10TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV10TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV10TCL_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV10TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV10TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a +#define NV10TCL_VB_ELEMENT_U16 0x00000e00 +#define NV10TCL_VB_ELEMENT_U16_I0_SHIFT 0 +#define NV10TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff +#define NV10TCL_VB_ELEMENT_U16_I1_SHIFT 16 +#define NV10TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 +#define NV10TCL_VB_ELEMENT_U32 0x00001100 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END 0x000013fc +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_STOP 0x00000000 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POINTS 0x00000001 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINES 0x00000002 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_LOOP 0x00000003 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_LINE_STRIP 0x00000004 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLES 0x00000005 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUADS 0x00000008 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV10TCL_VERTEX_BUFFER_BEGIN_END_POLYGON 0x0000000a +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS 0x00001400 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_SHIFT 0 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_FIRST_MASK 0x0000ffff +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_SHIFT 24 +#define NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS_LAST_MASK 0xff000000 +#define NV10TCL_VERTEX_ARRAY_DATA 0x00001800 + + +#define NV04_CONTEXT_COLOR_KEY 0x00000057 + + + +#define NV03_CONTEXT_SURFACES_2D 0x00000058 + +#define NV03_CONTEXT_SURFACES_2D_SYNCHRONIZE 0x00000100 +#define NV03_CONTEXT_SURFACES_2D_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_SURFACES_2D_DMA_SOURCE 0x00000184 +#define NV03_CONTEXT_SURFACES_2D_DMA_DESTIN 0x00000188 +#define NV03_CONTEXT_SURFACES_2D_COLOR_FORMAT 0x00000300 +#define NV03_CONTEXT_SURFACES_2D_PITCH 0x00000304 +#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_SHIFT 0 +#define NV03_CONTEXT_SURFACES_2D_PITCH_SOURCE_MASK 0x0000ffff +#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_SHIFT 16 +#define NV03_CONTEXT_SURFACES_2D_PITCH_DESTIN_MASK 0xffff0000 +#define NV03_CONTEXT_SURFACES_2D_OFFSET_SOURCE 0x00000308 +#define NV03_CONTEXT_SURFACES_2D_OFFSET_DESTIN 0x0000030c + + +#define NV03_CONTEXT_SURFACES_3D 0x0000005a + +#define NV03_CONTEXT_SURFACES_3D_SYNCHRONIZE 0x00000100 +#define NV03_CONTEXT_SURFACES_3D_DMA_NOTIFY 0x00000180 +#define NV03_CONTEXT_SURFACES_3D_DMA_SURFACE 0x00000184 +#define NV03_CONTEXT_SURFACES_3D_PITCH 0x00000300 +#define NV03_CONTEXT_SURFACES_3D_OFFSET_COLOR 0x00000304 +#define NV03_CONTEXT_SURFACES_3D_OFFSET_ZETA 0x00000308 + + +#define NV04_RENDER_SOLID_LINE 0x0000005c + +#define NV04_RENDER_SOLID_LINE_SURFACE 0x00000198 + + +#define NV04_RENDER_SOLID_TRIANGLE 0x0000005d + + + +#define NV04_RENDER_SOLID_RECTANGLE 0x0000005e + +#define NV04_RENDER_SOLID_RECTANGLE_SURFACE 0x00000198 + + +#define NV04_IMAGE_BLIT 0x0000005f + +#define NV04_IMAGE_BLIT_NOP 0x00000100 +#define NV04_IMAGE_BLIT_NOTIFY 0x00000104 +#define NV04_IMAGE_BLIT_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_BLIT_COLOR_KEY 0x00000184 +#define NV04_IMAGE_BLIT_CLIP_RECTANGLE 0x00000188 +#define NV04_IMAGE_BLIT_PATTERN 0x0000018c +#define NV04_IMAGE_BLIT_ROP 0x00000190 +#define NV04_IMAGE_BLIT_BETA4 0x00000198 +#define NV04_IMAGE_BLIT_SURFACE 0x0000019c +#define NV04_IMAGE_BLIT_OPERATION 0x000002fc +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_IMAGE_BLIT_OPERATION_ROP_AND 0x00000001 +#define NV04_IMAGE_BLIT_OPERATION_BLEND_AND 0x00000002 +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY 0x00000003 +#define NV04_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_IMAGE_BLIT_OPERATION_BLEND_PREMULT 0x00000005 + + +#define NV04_INDEXED_IMAGE_FROM_CPU 0x00000060 + +#define NV04_INDEXED_IMAGE_FROM_CPU_NOP 0x00000100 +#define NV04_INDEXED_IMAGE_FROM_CPU_NOTIFY 0x00000104 +#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH 0x0000010c +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT 0x00000184 +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT 0x000003e8 +#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT 0x000003ec +#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET 0x000003f0 +#define NV04_INDEXED_IMAGE_FROM_CPU_POINT 0x000003f4 +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT 0x000003f8 +#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN 0x000003fc +#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR 0x00000400 + + +#define NV04_IMAGE_FROM_CPU 0x00000061 + +#define NV04_IMAGE_FROM_CPU_BETA4 0x00000198 +#define NV04_IMAGE_FROM_CPU_SURFACE 0x0000019c + + +#define NV10_CONTEXT_SURFACES_2D 0x00000062 + + + +#define NV05_SCALED_IMAGE_FROM_MEMORY 0x00000063 + +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 + + +#define NV01_IMAGE_SRCCOPY_AND 0x00000064 + +#define NV01_IMAGE_SRCCOPY_AND_NOTIFY 0x00000104 +#define NV01_IMAGE_SRCCOPY_AND_DMA_NOTIFY 0x00000180 +#define NV01_IMAGE_SRCCOPY_AND_IMAGE_OUTPUT 0x00000200 +#define NV01_IMAGE_SRCCOPY_AND_IMAGE_INPUT 0x00000204 + + +#define NV05_INDEXED_IMAGE_FROM_CPU 0x00000064 + +#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_KEY 0x00000188 +#define NV05_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE 0x0000018c +#define NV05_INDEXED_IMAGE_FROM_CPU_PATTERN 0x00000190 +#define NV05_INDEXED_IMAGE_FROM_CPU_ROP 0x00000194 +#define NV05_INDEXED_IMAGE_FROM_CPU_BETA1 0x00000198 +#define NV05_INDEXED_IMAGE_FROM_CPU_BETA4 0x0000019c +#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE 0x000001a0 +#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000003e0 +#define NV05_INDEXED_IMAGE_FROM_CPU_OPERATION 0x000003e4 +#define NV05_INDEXED_IMAGE_FROM_CPU_INDICES 0x00000400 + + +#define NV05_IMAGE_FROM_CPU 0x00000065 + +#define NV05_IMAGE_FROM_CPU_BETA4 0x00000198 +#define NV05_IMAGE_FROM_CPU_SURFACE 0x0000019c + + +#define NV05_STRETCHED_IMAGE_FROM_CPU 0x00000066 + +#define NV05_STRETCHED_IMAGE_FROM_CPU_BETA4 0x00000194 +#define NV05_STRETCHED_IMAGE_FROM_CPU_SURFACE 0x00000198 +#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + + +#define NV04_IMAGE_BLEND_PREMULT 0x00000067 + +#define NV04_IMAGE_BLEND_PREMULT_NOP 0x00000100 +#define NV04_IMAGE_BLEND_PREMULT_NOTIFY 0x00000104 +#define NV04_IMAGE_BLEND_PREMULT_DMA_NOTIFY 0x00000180 +#define NV04_IMAGE_BLEND_PREMULT_IMAGE_OUTPUT 0x00000200 +#define NV04_IMAGE_BLEND_PREMULT_BETA_INPUT 0x00000204 +#define NV04_IMAGE_BLEND_PREMULT_IMAGE_INPUT 0x00000208 + + +#define NV03_CHANNEL_PIO 0x0000006a + + + +#define NV03_CHANNEL_DMA 0x0000006b + + + +#define NV04_BETA_SOLID 0x00000072 + +#define NV04_BETA_SOLID_NOP 0x00000100 +#define NV04_BETA_SOLID_NOTIFY 0x00000104 +#define NV04_BETA_SOLID_DMA_NOTIFY 0x00000180 +#define NV04_BETA_SOLID_BETA_OUTPUT 0x00000200 +#define NV04_BETA_SOLID_BETA_FACTOR 0x00000300 + + +#define NV04_STRETCHED_IMAGE_FROM_CPU 0x00000076 + + + +#define NV04_SCALED_IMAGE_FROM_MEMORY 0x00000077 + +#define NV04_SCALED_IMAGE_FROM_MEMORY_NOP 0x00000100 +#define NV04_SCALED_IMAGE_FROM_MEMORY_NOTIFY 0x00000104 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY 0x00000180 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE 0x00000184 +#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN 0x00000188 +#define NV04_SCALED_IMAGE_FROM_MEMORY_ROP 0x0000018c +#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA1 0x00000190 +#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4 0x00000194 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE 0x00000198 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION 0x000002fc +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT 0x00000300 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8 0x00000003 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8 0x00000004 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8 0x00000005 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8 0x00000006 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5 0x00000007 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8 0x00000008 +#define NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8 0x00000009 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION 0x00000304 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND 0x00000001 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND 0x00000002 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY 0x00000003 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT 0x00000005 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT 0x00000308 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE 0x0000030c +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT 0x00000310 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE 0x00000314 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DU_DX 0x00000318 +#define NV04_SCALED_IMAGE_FROM_MEMORY_DV_DY 0x0000031c +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE 0x00000400 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_W_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_SIZE_H_MASK 0xffff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT 0x00000404 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_MASK 0x00ff0000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER 0x00010000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER 0x00020000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_SHIFT 24 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_MASK 0xff000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE 0x00000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR 0x01000000 +#define NV04_SCALED_IMAGE_FROM_MEMORY_ADDRESS 0x00000408 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT 0x0000040c +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_SHIFT 0 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_X_MASK 0x0000ffff +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_SHIFT 16 +#define NV04_SCALED_IMAGE_FROM_MEMORY_POINT_Y_MASK 0xffff0000 + + +#define NV10_TEXTURE_FROM_CPU 0x0000007b + +#define NV10_TEXTURE_FROM_CPU_NOP 0x00000100 +#define NV10_TEXTURE_FROM_CPU_NOTIFY 0x00000104 +#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE 0x00000108 +#define NV10_TEXTURE_FROM_CPU_PM_TRIGGER 0x00000140 +#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY 0x00000180 +#define NV10_TEXTURE_FROM_CPU_SURFACE 0x00000184 +#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT 0x00000300 +#define NV10_TEXTURE_FROM_CPU_POINT 0x00000304 +#define NV10_TEXTURE_FROM_CPU_POINT_X_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_POINT_X_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_POINT_Y_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_POINT_Y_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_SIZE 0x00000308 +#define NV10_TEXTURE_FROM_CPU_SIZE_W_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_SIZE_W_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_SIZE_H_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_SIZE_H_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL 0x0000030c +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL 0x00000310 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_SHIFT 0 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y_MASK 0x0000ffff +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_SHIFT 16 +#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H_MASK 0xffff0000 +#define NV10_TEXTURE_FROM_CPU_COLOR(x) (0x00000400+((x)*4)) +#define NV10_TEXTURE_FROM_CPU_COLOR__SIZE 0x00000700 + + +#define NV10_VIDEO_DISPLAY 0x0000007c + + + +#define NV10_DVD_SUBPICTURE 0x00000088 + + + +#define NV10_SCALED_IMAGE_FROM_MEMORY 0x00000089 + +#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE 0x00000108 + + +#define NV10_IMAGE_FROM_CPU 0x0000008a + +#define NV10_IMAGE_FROM_CPU_COLOR_CONVERSION 0x000002f8 + + +#define NV10_CONTEXT_SURFACES_3D 0x00000093 + + + +#define NV10_DX5_TEXTURE_TRIANGLE 0x00000094 + + + +#define NV10_DX6_MULTI_TEXTURE_TRIANGLE 0x00000095 + + + +#define NV11TCL 0x00000096 + +#define NV11TCL_COLOR_LOGIC_OP_ENABLE 0x00000d40 +#define NV11TCL_COLOR_LOGIC_OP_OP 0x00000d44 +#define NV11TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV11TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV11TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV11TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV11TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV11TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV11TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV11TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV11TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV11TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV11TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV11TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV11TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV11TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f + + +#define NV20TCL 0x00000097 + +#define NV20TCL_NOP 0x00000100 +#define NV20TCL_NOTIFY 0x00000104 +#define NV20TCL_DMA_NOTIFY 0x00000180 +#define NV20TCL_DMA_TEXTURE0 0x00000184 +#define NV20TCL_DMA_TEXTURE1 0x00000188 +#define NV20TCL_DMA_COLOR 0x00000194 +#define NV20TCL_DMA_ZETA 0x00000198 +#define NV20TCL_DMA_VTXBUF0 0x0000019c +#define NV20TCL_DMA_VTXBUF1 0x000001a0 +#define NV20TCL_DMA_FENCE 0x000001a4 +#define NV20TCL_DMA_QUERY 0x000001a8 +#define NV20TCL_RT_HORIZ 0x00000200 +#define NV20TCL_RT_HORIZ_X_SHIFT 0 +#define NV20TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV20TCL_RT_HORIZ_W_SHIFT 16 +#define NV20TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV20TCL_RT_VERT 0x00000204 +#define NV20TCL_RT_VERT_Y_SHIFT 0 +#define NV20TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV20TCL_RT_VERT_H_SHIFT 16 +#define NV20TCL_RT_VERT_H_MASK 0xffff0000 +#define NV20TCL_RT_FORMAT 0x00000208 +#define NV20TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV20TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV20TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV20TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV20TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV20TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV20TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV20TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV20TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV20TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV20TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d +#define NV20TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV20TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV20TCL_RT_PITCH 0x0000020c +#define NV20TCL_RT_PITCH_COLOR_PITCH_SHIFT 0 +#define NV20TCL_RT_PITCH_COLOR_PITCH_MASK 0x0000ffff +#define NV20TCL_RT_PITCH_ZETA_PITCH_SHIFT 16 +#define NV20TCL_RT_PITCH_ZETA_PITCH_MASK 0xffff0000 +#define NV20TCL_COLOR_OFFSET 0x00000210 +#define NV20TCL_ZETA_OFFSET 0x00000214 +#define NV20TCL_RC_IN_ALPHA(x) (0x00000260+((x)*4)) +#define NV20TCL_RC_IN_ALPHA__SIZE 0x00000008 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f +#define NV20TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c +#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d +#define NV20TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV20TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4) +#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV20TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV20TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12) +#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV20TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV20TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV20TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20) +#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV20TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV20TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV20TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28) +#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV20TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV20TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV20TCL_RC_FINAL0 0x00000288 +#define NV20TCL_RC_FINAL0_D_INPUT_SHIFT 0 +#define NV20TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f +#define NV20TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV20TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV20TCL_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV20TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV20TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV20TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c +#define NV20TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d +#define NV20TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV20TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4) +#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV20TCL_RC_FINAL0_D_MAPPING_SHIFT 5 +#define NV20TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0 +#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV20TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV20TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV20TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV20TCL_RC_FINAL0_C_INPUT_SHIFT 8 +#define NV20TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00 +#define NV20TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV20TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12) +#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV20TCL_RC_FINAL0_C_MAPPING_SHIFT 13 +#define NV20TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000 +#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV20TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV20TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV20TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV20TCL_RC_FINAL0_B_INPUT_SHIFT 16 +#define NV20TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000 +#define NV20TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV20TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV20TCL_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV20TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV20TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV20TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000 +#define NV20TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000 +#define NV20TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV20TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20) +#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV20TCL_RC_FINAL0_B_MAPPING_SHIFT 21 +#define NV20TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000 +#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV20TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV20TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV20TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV20TCL_RC_FINAL0_A_INPUT_SHIFT 24 +#define NV20TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000 +#define NV20TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV20TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV20TCL_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV20TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV20TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV20TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000 +#define NV20TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000 +#define NV20TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV20TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28) +#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_SHIFT 29 +#define NV20TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV20TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV20TCL_RC_FINAL1 0x0000028c +#define NV20TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7) +#define NV20TCL_RC_FINAL1_G_INPUT_SHIFT 8 +#define NV20TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00 +#define NV20TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV20TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12) +#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV20TCL_RC_FINAL1_G_MAPPING_SHIFT 13 +#define NV20TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000 +#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV20TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV20TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV20TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV20TCL_RC_FINAL1_F_INPUT_SHIFT 16 +#define NV20TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000 +#define NV20TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV20TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV20TCL_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV20TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV20TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000 +#define NV20TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000 +#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000 +#define NV20TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000 +#define NV20TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV20TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20) +#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV20TCL_RC_FINAL1_F_MAPPING_SHIFT 21 +#define NV20TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000 +#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV20TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV20TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV20TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV20TCL_RC_FINAL1_E_INPUT_SHIFT 24 +#define NV20TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000 +#define NV20TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV20TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV20TCL_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV20TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV20TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000 +#define NV20TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000 +#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000 +#define NV20TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000 +#define NV20TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV20TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28) +#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_SHIFT 29 +#define NV20TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV20TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV20TCL_LIGHT_CONTROL 0x00000294 +#define NV20TCL_FOG_MODE 0x0000029c +#define NV20TCL_FOG_MODE_EXP 0x00000800 +#define NV20TCL_FOG_MODE_EXP_2 0x00000802 +#define NV20TCL_FOG_MODE_EXP2 0x00000803 +#define NV20TCL_FOG_MODE_LINEAR 0x00000804 +#define NV20TCL_FOG_MODE_LINEAR_2 0x00002601 +#define NV20TCL_FOG_COORD_DIST 0x000002a0 +#define NV20TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000 +#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001 +#define NV20TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002 +#define NV20TCL_FOG_COORD_DIST_COORD_FOG 0x00000003 +#define NV20TCL_FOG_ENABLE 0x000002a4 +#define NV20TCL_FOG_COLOR 0x000002a8 +#define NV20TCL_FOG_COLOR_R_SHIFT 0 +#define NV20TCL_FOG_COLOR_R_MASK 0x000000ff +#define NV20TCL_FOG_COLOR_G_SHIFT 8 +#define NV20TCL_FOG_COLOR_G_MASK 0x0000ff00 +#define NV20TCL_FOG_COLOR_B_SHIFT 16 +#define NV20TCL_FOG_COLOR_B_MASK 0x00ff0000 +#define NV20TCL_FOG_COLOR_A_SHIFT 24 +#define NV20TCL_FOG_COLOR_A_MASK 0xff000000 +#define NV20TCL_VIEWPORT_CLIP_MODE 0x000002b4 +#define NV20TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*4)) +#define NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV20TCL_VIEWPORT_CLIP_VERT(x) (0x000002e0+((x)*4)) +#define NV20TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV20TCL_ALPHA_FUNC_ENABLE 0x00000300 +#define NV20TCL_BLEND_FUNC_ENABLE 0x00000304 +#define NV20TCL_CULL_FACE_ENABLE 0x00000308 +#define NV20TCL_DEPTH_TEST_ENABLE 0x0000030c +#define NV20TCL_DITHER_ENABLE 0x00000310 +#define NV20TCL_LIGHTING_ENABLE 0x00000314 +#define NV20TCL_POINT_PARAMETERS_ENABLE 0x00000318 +#define NV20TCL_POINT_SMOOTH_ENABLE 0x0000031c +#define NV20TCL_LINE_SMOOTH_ENABLE 0x00000320 +#define NV20TCL_POLYGON_SMOOTH_ENABLE 0x00000324 +#define NV20TCL_STENCIL_ENABLE 0x0000032c +#define NV20TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000330 +#define NV20TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000334 +#define NV20TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000338 +#define NV20TCL_ALPHA_FUNC_FUNC 0x0000033c +#define NV20TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV20TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV20TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV20TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV20TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV20TCL_ALPHA_FUNC_REF 0x00000340 +#define NV20TCL_BLEND_FUNC_SRC 0x00000344 +#define NV20TCL_BLEND_FUNC_SRC_ZERO 0x00000000 +#define NV20TCL_BLEND_FUNC_SRC_ONE 0x00000001 +#define NV20TCL_BLEND_FUNC_SRC_SRC_COLOR 0x00000300 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA 0x00000302 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV20TCL_BLEND_FUNC_SRC_DST_ALPHA 0x00000304 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV20TCL_BLEND_FUNC_SRC_DST_COLOR 0x00000306 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR 0x00000307 +#define NV20TCL_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE 0x00000308 +#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_COLOR 0x00008001 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV20TCL_BLEND_FUNC_SRC_CONSTANT_ALPHA 0x00008003 +#define NV20TCL_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV20TCL_BLEND_FUNC_DST 0x00000348 +#define NV20TCL_BLEND_FUNC_DST_ZERO 0x00000000 +#define NV20TCL_BLEND_FUNC_DST_ONE 0x00000001 +#define NV20TCL_BLEND_FUNC_DST_SRC_COLOR 0x00000300 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA 0x00000302 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV20TCL_BLEND_FUNC_DST_DST_ALPHA 0x00000304 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV20TCL_BLEND_FUNC_DST_DST_COLOR 0x00000306 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR 0x00000307 +#define NV20TCL_BLEND_FUNC_DST_SRC_ALPHA_SATURATE 0x00000308 +#define NV20TCL_BLEND_FUNC_DST_CONSTANT_COLOR 0x00008001 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV20TCL_BLEND_FUNC_DST_CONSTANT_ALPHA 0x00008003 +#define NV20TCL_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV20TCL_BLEND_COLOR 0x0000034c +#define NV20TCL_BLEND_COLOR_B_SHIFT 0 +#define NV20TCL_BLEND_COLOR_B_MASK 0x000000ff +#define NV20TCL_BLEND_COLOR_G_SHIFT 8 +#define NV20TCL_BLEND_COLOR_G_MASK 0x0000ff00 +#define NV20TCL_BLEND_COLOR_R_SHIFT 16 +#define NV20TCL_BLEND_COLOR_R_MASK 0x00ff0000 +#define NV20TCL_BLEND_COLOR_A_SHIFT 24 +#define NV20TCL_BLEND_COLOR_A_MASK 0xff000000 +#define NV20TCL_BLEND_EQUATION 0x00000350 +#define NV20TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV20TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV20TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV20TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV20TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV20TCL_DEPTH_FUNC 0x00000354 +#define NV20TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV20TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV20TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV20TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV20TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV20TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV20TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV20TCL_COLOR_MASK 0x00000358 +#define NV20TCL_COLOR_MASK_B (1 << 0) +#define NV20TCL_COLOR_MASK_G (1 << 8) +#define NV20TCL_COLOR_MASK_R (1 << 16) +#define NV20TCL_COLOR_MASK_A (1 << 24) +#define NV20TCL_DEPTH_WRITE_ENABLE 0x0000035c +#define NV20TCL_STENCIL_MASK 0x00000360 +#define NV20TCL_STENCIL_FUNC_FUNC 0x00000364 +#define NV20TCL_STENCIL_FUNC_FUNC_NEVER 0x00000200 +#define NV20TCL_STENCIL_FUNC_FUNC_LESS 0x00000201 +#define NV20TCL_STENCIL_FUNC_FUNC_EQUAL 0x00000202 +#define NV20TCL_STENCIL_FUNC_FUNC_LEQUAL 0x00000203 +#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_STENCIL_FUNC_FUNC_GREATER 0x00000204 +#define NV20TCL_STENCIL_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV20TCL_STENCIL_FUNC_FUNC_GEQUAL 0x00000206 +#define NV20TCL_STENCIL_FUNC_FUNC_ALWAYS 0x00000207 +#define NV20TCL_STENCIL_FUNC_REF 0x00000368 +#define NV20TCL_STENCIL_FUNC_MASK 0x0000036c +#define NV20TCL_STENCIL_OP_FAIL 0x00000370 +#define NV20TCL_STENCIL_OP_FAIL_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_FAIL_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_FAIL_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_FAIL_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_FAIL_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_FAIL_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_FAIL_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_FAIL_DECR_WRAP 0x00008508 +#define NV20TCL_STENCIL_OP_ZFAIL 0x00000374 +#define NV20TCL_STENCIL_OP_ZFAIL_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_ZFAIL_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_ZFAIL_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_ZFAIL_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_ZFAIL_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_ZFAIL_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV20TCL_STENCIL_OP_ZPASS 0x00000378 +#define NV20TCL_STENCIL_OP_ZPASS_ZERO 0x00000000 +#define NV20TCL_STENCIL_OP_ZPASS_INVERT 0x0000150a +#define NV20TCL_STENCIL_OP_ZPASS_KEEP 0x00001e00 +#define NV20TCL_STENCIL_OP_ZPASS_REPLACE 0x00001e01 +#define NV20TCL_STENCIL_OP_ZPASS_INCR 0x00001e02 +#define NV20TCL_STENCIL_OP_ZPASS_DECR 0x00001e03 +#define NV20TCL_STENCIL_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV20TCL_STENCIL_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV20TCL_SHADE_MODEL 0x0000037c +#define NV20TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV20TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV20TCL_LINE_WIDTH 0x00000380 +#define NV20TCL_POLYGON_OFFSET_FACTOR 0x00000384 +#define NV20TCL_POLYGON_OFFSET_UNITS 0x00000388 +#define NV20TCL_POLYGON_MODE_FRONT 0x0000038c +#define NV20TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV20TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV20TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV20TCL_POLYGON_MODE_BACK 0x00000390 +#define NV20TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV20TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV20TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV20TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV20TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV20TCL_CULL_FACE 0x0000039c +#define NV20TCL_CULL_FACE_FRONT 0x00000404 +#define NV20TCL_CULL_FACE_BACK 0x00000405 +#define NV20TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV20TCL_FRONT_FACE 0x000003a0 +#define NV20TCL_FRONT_FACE_CW 0x00000900 +#define NV20TCL_FRONT_FACE_CCW 0x00000901 +#define NV20TCL_NORMALIZE_ENABLE 0x000003a4 +#define NV20TCL_COLOR_MATERIAL_FRONT_R 0x000003a8 +#define NV20TCL_COLOR_MATERIAL_FRONT_G 0x000003ac +#define NV20TCL_COLOR_MATERIAL_FRONT_B 0x000003b0 +#define NV20TCL_COLOR_MATERIAL_FRONT_A 0x000003b4 +#define NV20TCL_SEPARATE_SPECULAR_ENABLE 0x000003b8 +#define NV20TCL_ENABLED_LIGHTS 0x000003bc +#define NV20TCL_TX_GEN_S(x) (0x000003c0+((x)*16)) +#define NV20TCL_TX_GEN_S__SIZE 0x00000004 +#define NV20TCL_TX_GEN_S_FALSE 0x00000000 +#define NV20TCL_TX_GEN_S_EYE_LINEAR 0x00002400 +#define NV20TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401 +#define NV20TCL_TX_GEN_S_SPHERE_MAP 0x00002402 +#define NV20TCL_TX_GEN_S_NORMAL_MAP 0x00008511 +#define NV20TCL_TX_GEN_S_REFLECTION_MAP 0x00008512 +#define NV20TCL_TX_GEN_T(x) (0x000003c4+((x)*16)) +#define NV20TCL_TX_GEN_T__SIZE 0x00000004 +#define NV20TCL_TX_GEN_T_FALSE 0x00000000 +#define NV20TCL_TX_GEN_T_EYE_LINEAR 0x00002400 +#define NV20TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401 +#define NV20TCL_TX_GEN_T_SPHERE_MAP 0x00002402 +#define NV20TCL_TX_GEN_T_NORMAL_MAP 0x00008511 +#define NV20TCL_TX_GEN_T_REFLECTION_MAP 0x00008512 +#define NV20TCL_TX_GEN_R(x) (0x000003c8+((x)*16)) +#define NV20TCL_TX_GEN_R__SIZE 0x00000004 +#define NV20TCL_TX_GEN_R_FALSE 0x00000000 +#define NV20TCL_TX_GEN_R_EYE_LINEAR 0x00002400 +#define NV20TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401 +#define NV20TCL_TX_GEN_R_SPHERE_MAP 0x00002402 +#define NV20TCL_TX_GEN_R_NORMAL_MAP 0x00008511 +#define NV20TCL_TX_GEN_R_REFLECTION_MAP 0x00008512 +#define NV20TCL_TX_GEN_Q(x) (0x000003cc+((x)*16)) +#define NV20TCL_TX_GEN_Q__SIZE 0x00000004 +#define NV20TCL_TX_GEN_Q_FALSE 0x00000000 +#define NV20TCL_TX_GEN_Q_EYE_LINEAR 0x00002400 +#define NV20TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401 +#define NV20TCL_TX_GEN_Q_SPHERE_MAP 0x00002402 +#define NV20TCL_TX_GEN_Q_NORMAL_MAP 0x00008511 +#define NV20TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512 +#define NV20TCL_TX_MATRIX_ENABLE(x) (0x00000420+((x)*4)) +#define NV20TCL_TX_MATRIX_ENABLE__SIZE 0x00000004 +#define NV20TCL_POINT_SIZE 0x0000043c +#define NV20TCL_MODELVIEW0_MATRIX(x) (0x00000480+((x)*4)) +#define NV20TCL_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV20TCL_MODELVIEW1_MATRIX(x) (0x000004c0+((x)*4)) +#define NV20TCL_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV20TCL_MODELVIEW2_MATRIX(x) (0x00000500+((x)*4)) +#define NV20TCL_MODELVIEW2_MATRIX__SIZE 0x00000010 +#define NV20TCL_MODELVIEW3_MATRIX(x) (0x00000540+((x)*4)) +#define NV20TCL_MODELVIEW3_MATRIX__SIZE 0x00000010 +#define NV20TCL_INVERSE_MODELVIEW0_MATRIX(x) (0x00000580+((x)*4)) +#define NV20TCL_INVERSE_MODELVIEW0_MATRIX__SIZE 0x00000010 +#define NV20TCL_INVERSE_MODELVIEW1_MATRIX(x) (0x000005c0+((x)*4)) +#define NV20TCL_INVERSE_MODELVIEW1_MATRIX__SIZE 0x00000010 +#define NV20TCL_INVERSE_MODELVIEW2_MATRIX(x) (0x00000600+((x)*4)) +#define NV20TCL_INVERSE_MODELVIEW2_MATRIX__SIZE 0x00000010 +#define NV20TCL_INVERSE_MODELVIEW3_MATRIX(x) (0x00000640+((x)*4)) +#define NV20TCL_INVERSE_MODELVIEW3_MATRIX__SIZE 0x00000010 +#define NV20TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) +#define NV20TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) +#define NV20TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) +#define NV20TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) +#define NV20TCL_TX2_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) +#define NV20TCL_TX3_MATRIX__SIZE 0x00000010 +#define NV20TCL_TX0_CLIP_PLANE_A(x) (0x00000840+((x)*16)) +#define NV20TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004 +#define NV20TCL_TX0_CLIP_PLANE_B(x) (0x00000844+((x)*16)) +#define NV20TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004 +#define NV20TCL_TX0_CLIP_PLANE_C(x) (0x00000848+((x)*16)) +#define NV20TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004 +#define NV20TCL_TX0_CLIP_PLANE_D(x) (0x0000084c+((x)*16)) +#define NV20TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004 +#define NV20TCL_TX1_CLIP_PLANE_A(x) (0x00000880+((x)*16)) +#define NV20TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004 +#define NV20TCL_TX1_CLIP_PLANE_B(x) (0x00000884+((x)*16)) +#define NV20TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004 +#define NV20TCL_TX1_CLIP_PLANE_C(x) (0x00000888+((x)*16)) +#define NV20TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004 +#define NV20TCL_TX1_CLIP_PLANE_D(x) (0x0000088c+((x)*16)) +#define NV20TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004 +#define NV20TCL_TX2_CLIP_PLANE_A(x) (0x000008c0+((x)*16)) +#define NV20TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004 +#define NV20TCL_TX2_CLIP_PLANE_B(x) (0x000008c4+((x)*16)) +#define NV20TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004 +#define NV20TCL_TX2_CLIP_PLANE_C(x) (0x000008c8+((x)*16)) +#define NV20TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004 +#define NV20TCL_TX2_CLIP_PLANE_D(x) (0x000008cc+((x)*16)) +#define NV20TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004 +#define NV20TCL_TX3_CLIP_PLANE_A(x) (0x00000900+((x)*16)) +#define NV20TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004 +#define NV20TCL_TX3_CLIP_PLANE_B(x) (0x00000904+((x)*16)) +#define NV20TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004 +#define NV20TCL_TX3_CLIP_PLANE_C(x) (0x00000908+((x)*16)) +#define NV20TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004 +#define NV20TCL_TX3_CLIP_PLANE_D(x) (0x0000090c+((x)*16)) +#define NV20TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004 +#define NV20TCL_FOG_EQUATION_CONSTANT 0x000009c0 +#define NV20TCL_FOG_EQUATION_LINEAR 0x000009c4 +#define NV20TCL_FOG_EQUATION_QUADRATIC 0x000009c8 +#define NV20TCL_FRONT_MATERIAL_SHININESS(x) (0x000009e0+((x)*4)) +#define NV20TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 +#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 +#define NV20TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 +#define NV20TCL_VIEWPORT_SCALE0_X 0x00000a20 +#define NV20TCL_VIEWPORT_SCALE0_Y 0x00000a24 +#define NV20TCL_VIEWPORT_SCALE0_Z 0x00000a28 +#define NV20TCL_VIEWPORT_SCALE0_W 0x00000a2c +#define NV20TCL_POINT_PARAMETER(x) (0x00000a30+((x)*4)) +#define NV20TCL_POINT_PARAMETER__SIZE 0x00000008 +#define NV20TCL_RC_CONSTANT_COLOR0(x) (0x00000a60+((x)*4)) +#define NV20TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 +#define NV20TCL_RC_CONSTANT_COLOR0_B_SHIFT 0 +#define NV20TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff +#define NV20TCL_RC_CONSTANT_COLOR0_G_SHIFT 8 +#define NV20TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00 +#define NV20TCL_RC_CONSTANT_COLOR0_R_SHIFT 16 +#define NV20TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000 +#define NV20TCL_RC_CONSTANT_COLOR0_A_SHIFT 24 +#define NV20TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000 +#define NV20TCL_RC_CONSTANT_COLOR1(x) (0x00000a80+((x)*4)) +#define NV20TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 +#define NV20TCL_RC_CONSTANT_COLOR1_B_SHIFT 0 +#define NV20TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff +#define NV20TCL_RC_CONSTANT_COLOR1_G_SHIFT 8 +#define NV20TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00 +#define NV20TCL_RC_CONSTANT_COLOR1_R_SHIFT 16 +#define NV20TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000 +#define NV20TCL_RC_CONSTANT_COLOR1_A_SHIFT 24 +#define NV20TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000 +#define NV20TCL_RC_OUT_ALPHA(x) (0x00000aa0+((x)*4)) +#define NV20TCL_RC_OUT_ALPHA__SIZE 0x00000008 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV20TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV20TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12) +#define NV20TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13) +#define NV20TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14) +#define NV20TCL_RC_OUT_ALPHA_BIAS (1 << 15) +#define NV20TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV20TCL_RC_OUT_ALPHA_SCALE_SHIFT 17 +#define NV20TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV20TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV20TCL_RC_IN_RGB(x) (0x00000ac0+((x)*4)) +#define NV20TCL_RC_IN_RGB__SIZE 0x00000008 +#define NV20TCL_RC_IN_RGB_D_INPUT_SHIFT 0 +#define NV20TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f +#define NV20TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV20TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV20TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV20TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV20TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV20TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c +#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d +#define NV20TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV20TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4) +#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV20TCL_RC_IN_RGB_D_MAPPING_SHIFT 5 +#define NV20TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0 +#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV20TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV20TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV20TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV20TCL_RC_IN_RGB_C_INPUT_SHIFT 8 +#define NV20TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00 +#define NV20TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV20TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12) +#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_SHIFT 13 +#define NV20TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV20TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV20TCL_RC_IN_RGB_B_INPUT_SHIFT 16 +#define NV20TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000 +#define NV20TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV20TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV20TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV20TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV20TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV20TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000 +#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000 +#define NV20TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV20TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20) +#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_SHIFT 21 +#define NV20TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV20TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV20TCL_RC_IN_RGB_A_INPUT_SHIFT 24 +#define NV20TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV20TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28) +#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV20TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_SHIFT 29 +#define NV20TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV20TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV20TCL_VIEWPORT_SCALE1_X 0x00000af0 +#define NV20TCL_VIEWPORT_SCALE1_Y 0x00000af4 +#define NV20TCL_VIEWPORT_SCALE1_Z 0x00000af8 +#define NV20TCL_VIEWPORT_SCALE1_W 0x00000afc +#define NV20TCL_VP_UPLOAD_INST(x) (0x00000b00+((x)*4)) +#define NV20TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV20TCL_VP_UPLOAD_CONST(x) (0x00000b80+((x)*4)) +#define NV20TCL_VP_UPLOAD_CONST__SIZE 0x00000004 +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R(x) (0x00000c00+((x)*64)) +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G(x) (0x00000c04+((x)*64)) +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B(x) (0x00000c08+((x)*64)) +#define NV20TCL_LIGHT_BACK_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*128)) +#define NV20TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 +#define NV20TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*128)) +#define NV20TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 +#define NV20TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*128)) +#define NV20TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 +#define NV20TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*128)) +#define NV20TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 +#define NV20TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*128)) +#define NV20TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 +#define NV20TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*128)) +#define NV20TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 +#define NV20TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*128)) +#define NV20TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 +#define NV20TCL_LIGHT_POSITION_X(x) (0x0000105c+((x)*128)) +#define NV20TCL_LIGHT_POSITION_X__SIZE 0x00000008 +#define NV20TCL_LIGHT_POSITION_Y(x) (0x00001060+((x)*128)) +#define NV20TCL_LIGHT_POSITION_Y__SIZE 0x00000008 +#define NV20TCL_LIGHT_POSITION_Z(x) (0x00001064+((x)*128)) +#define NV20TCL_LIGHT_POSITION_Z__SIZE 0x00000008 +#define NV20TCL_LIGHT_CONSTANT_ATTENUATION(x) (0x00001068+((x)*128)) +#define NV20TCL_LIGHT_CONSTANT_ATTENUATION__SIZE 0x00000008 +#define NV20TCL_LIGHT_LINEAR_ATTENUATION(x) (0x0000106c+((x)*128)) +#define NV20TCL_LIGHT_LINEAR_ATTENUATION__SIZE 0x00000008 +#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION(x) (0x00001070+((x)*128)) +#define NV20TCL_LIGHT_QUADRATIC_ATTENUATION__SIZE 0x00000008 +#define NV20TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV20TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV20TCL_VERTEX_POS_3F_X 0x00001500 +#define NV20TCL_VERTEX_POS_3F_Y 0x00001504 +#define NV20TCL_VERTEX_POS_3F_Z 0x00001508 +#define NV20TCL_VERTEX_POS_4F_X 0x00001518 +#define NV20TCL_VERTEX_POS_4F_Y 0x0000151c +#define NV20TCL_VERTEX_POS_4F_Z 0x00001520 +#define NV20TCL_VERTEX_POS_3I_XY 0x00001528 +#define NV20TCL_VERTEX_POS_3I_XY_X_SHIFT 0 +#define NV20TCL_VERTEX_POS_3I_XY_X_MASK 0x0000ffff +#define NV20TCL_VERTEX_POS_3I_XY_Y_SHIFT 16 +#define NV20TCL_VERTEX_POS_3I_XY_Y_MASK 0xffff0000 +#define NV20TCL_VERTEX_POS_3I_Z 0x0000152c +#define NV20TCL_VERTEX_POS_3I_Z_Z_SHIFT 0 +#define NV20TCL_VERTEX_POS_3I_Z_Z_MASK 0x0000ffff +#define NV20TCL_VERTEX_NOR_3F_X 0x00001530 +#define NV20TCL_VERTEX_NOR_3F_Y 0x00001534 +#define NV20TCL_VERTEX_NOR_3F_Z 0x00001538 +#define NV20TCL_VERTEX_NOR_3I_XY 0x00001540 +#define NV20TCL_VERTEX_NOR_3I_XY_X_SHIFT 0 +#define NV20TCL_VERTEX_NOR_3I_XY_X_MASK 0x0000ffff +#define NV20TCL_VERTEX_NOR_3I_XY_Y_SHIFT 16 +#define NV20TCL_VERTEX_NOR_3I_XY_Y_MASK 0xffff0000 +#define NV20TCL_VERTEX_NOR_3I_Z 0x00001544 +#define NV20TCL_VERTEX_NOR_3I_Z_Z_SHIFT 0 +#define NV20TCL_VERTEX_NOR_3I_Z_Z_MASK 0x0000ffff +#define NV20TCL_VERTEX_COL_4F_X 0x00001550 +#define NV20TCL_VERTEX_COL_4F_Y 0x00001554 +#define NV20TCL_VERTEX_COL_4F_Z 0x00001558 +#define NV20TCL_VERTEX_COL_4F_W 0x0000155c +#define NV20TCL_VERTEX_COL_3F_X 0x00001560 +#define NV20TCL_VERTEX_COL_3F_Y 0x00001564 +#define NV20TCL_VERTEX_COL_3F_Z 0x00001568 +#define NV20TCL_VERTEX_COL_4I 0x0000156c +#define NV20TCL_VERTEX_COL_4I_R_SHIFT 0 +#define NV20TCL_VERTEX_COL_4I_R_MASK 0x000000ff +#define NV20TCL_VERTEX_COL_4I_G_SHIFT 8 +#define NV20TCL_VERTEX_COL_4I_G_MASK 0x0000ff00 +#define NV20TCL_VERTEX_COL_4I_B_SHIFT 16 +#define NV20TCL_VERTEX_COL_4I_B_MASK 0x00ff0000 +#define NV20TCL_VERTEX_COL_4I_A_SHIFT 24 +#define NV20TCL_VERTEX_COL_4I_A_MASK 0xff000000 +#define NV20TCL_VERTEX_COL2_3F_X 0x00001580 +#define NV20TCL_VERTEX_COL2_3F_Y 0x00001584 +#define NV20TCL_VERTEX_COL2_3F_Z 0x00001588 +#define NV20TCL_VERTEX_COL2_4I 0x0000158c +#define NV20TCL_VERTEX_COL2_4I_R_SHIFT 0 +#define NV20TCL_VERTEX_COL2_4I_R_MASK 0x000000ff +#define NV20TCL_VERTEX_COL2_4I_G_SHIFT 8 +#define NV20TCL_VERTEX_COL2_4I_G_MASK 0x0000ff00 +#define NV20TCL_VERTEX_COL2_4I_B_SHIFT 16 +#define NV20TCL_VERTEX_COL2_4I_B_MASK 0x00ff0000 +#define NV20TCL_VERTEX_COL2_4I_A_SHIFT 24 +#define NV20TCL_VERTEX_COL2_4I_A_MASK 0xff000000 +#define NV20TCL_VERTEX_TX0_2F_S 0x00001590 +#define NV20TCL_VERTEX_TX0_2F_T 0x00001594 +#define NV20TCL_VERTEX_TX0_2I 0x00001598 +#define NV20TCL_VERTEX_TX0_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX0_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX0_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX0_4F_S 0x000015a0 +#define NV20TCL_VERTEX_TX0_4F_T 0x000015a4 +#define NV20TCL_VERTEX_TX0_4F_R 0x000015a8 +#define NV20TCL_VERTEX_TX0_4F_Q 0x000015ac +#define NV20TCL_VERTEX_TX0_4I_ST 0x000015b0 +#define NV20TCL_VERTEX_TX0_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX0_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX0_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX0_4I_RQ 0x000015b4 +#define NV20TCL_VERTEX_TX0_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX0_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX0_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX0_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_2F_S 0x000015b8 +#define NV20TCL_VERTEX_TX1_2F_T 0x000015bc +#define NV20TCL_VERTEX_TX1_2I 0x000015c0 +#define NV20TCL_VERTEX_TX1_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX1_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX1_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_4F_S 0x000015c8 +#define NV20TCL_VERTEX_TX1_4F_T 0x000015cc +#define NV20TCL_VERTEX_TX1_4F_R 0x000015d0 +#define NV20TCL_VERTEX_TX1_4F_Q 0x000015d4 +#define NV20TCL_VERTEX_TX1_4I_ST 0x000015d8 +#define NV20TCL_VERTEX_TX1_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX1_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX1_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX1_4I_RQ 0x000015dc +#define NV20TCL_VERTEX_TX1_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX1_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX1_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX1_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_2F_S 0x000015e0 +#define NV20TCL_VERTEX_TX2_2F_T 0x000015e4 +#define NV20TCL_VERTEX_TX2_2I 0x000015e8 +#define NV20TCL_VERTEX_TX2_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX2_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX2_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_4F_S 0x000015f0 +#define NV20TCL_VERTEX_TX2_4F_T 0x000015f4 +#define NV20TCL_VERTEX_TX2_4F_R 0x000015f8 +#define NV20TCL_VERTEX_TX2_4F_Q 0x000015fc +#define NV20TCL_VERTEX_TX2_4I_ST 0x00001600 +#define NV20TCL_VERTEX_TX2_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX2_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX2_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX2_4I_RQ 0x00001604 +#define NV20TCL_VERTEX_TX2_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX2_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX2_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX2_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_2F_S 0x00001608 +#define NV20TCL_VERTEX_TX3_2F_T 0x0000160c +#define NV20TCL_VERTEX_TX3_2I 0x00001610 +#define NV20TCL_VERTEX_TX3_2I_S_SHIFT 0 +#define NV20TCL_VERTEX_TX3_2I_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_2I_T_SHIFT 16 +#define NV20TCL_VERTEX_TX3_2I_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_4F_S 0x00001620 +#define NV20TCL_VERTEX_TX3_4F_T 0x00001624 +#define NV20TCL_VERTEX_TX3_4F_R 0x00001628 +#define NV20TCL_VERTEX_TX3_4F_Q 0x0000162c +#define NV20TCL_VERTEX_TX3_4I_ST 0x00001630 +#define NV20TCL_VERTEX_TX3_4I_ST_S_SHIFT 0 +#define NV20TCL_VERTEX_TX3_4I_ST_S_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_4I_ST_T_SHIFT 16 +#define NV20TCL_VERTEX_TX3_4I_ST_T_MASK 0xffff0000 +#define NV20TCL_VERTEX_TX3_4I_RQ 0x00001634 +#define NV20TCL_VERTEX_TX3_4I_RQ_R_SHIFT 0 +#define NV20TCL_VERTEX_TX3_4I_RQ_R_MASK 0x0000ffff +#define NV20TCL_VERTEX_TX3_4I_RQ_Q_SHIFT 16 +#define NV20TCL_VERTEX_TX3_4I_RQ_Q_MASK 0xffff0000 +#define NV20TCL_VERTEX_FOG_1F 0x00001698 +#define NV20TCL_EDGEFLAG_ENABLE 0x000016bc +#define NV20TCL_VTXBUF_ADDRESS(x) (0x00001720+((x)*4)) +#define NV20TCL_VTXBUF_ADDRESS__SIZE 0x00000010 +#define NV20TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) +#define NV20TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 +#define NV20TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV20TCL_VTXFMT(x) (0x00001760+((x)*4)) +#define NV20TCL_VTXFMT__SIZE 0x00000010 +#define NV20TCL_VTXFMT_TYPE_SHIFT 0 +#define NV20TCL_VTXFMT_TYPE_MASK 0x0000000f +#define NV20TCL_VTXFMT_TYPE_FLOAT 0x00000002 +#define NV20TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV20TCL_VTXFMT_TYPE_USHORT 0x00000005 +#define NV20TCL_VTXFMT_SIZE_SHIFT 4 +#define NV20TCL_VTXFMT_SIZE_MASK 0x000000f0 +#define NV20TCL_VTXFMT_STRIDE_SHIFT 8 +#define NV20TCL_VTXFMT_STRIDE_MASK 0x0000ff00 +#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 +#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 +#define NV20TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 +#define NV20TCL_COLOR_MATERIAL_BACK_A 0x000017ac +#define NV20TCL_COLOR_MATERIAL_BACK_R 0x000017b0 +#define NV20TCL_COLOR_MATERIAL_BACK_G 0x000017b4 +#define NV20TCL_COLOR_MATERIAL_BACK_B 0x000017b8 +#define NV20TCL_COLOR_LOGIC_OP_ENABLE 0x000017bc +#define NV20TCL_COLOR_LOGIC_OP_OP 0x000017c0 +#define NV20TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV20TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV20TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV20TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV20TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV20TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV20TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV20TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV20TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV20TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV20TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV20TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV20TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV20TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV20TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV20TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f +#define NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE 0x000017c4 +#define NV20TCL_TX_SHADER_CULL_MODE 0x000017f8 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S (1 << 0) +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_S_LESS 0x00000001 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T (1 << 1) +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_T_LESS 0x00000002 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R (1 << 2) +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_R_LESS 0x00000004 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q (1 << 3) +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX0_Q_LESS 0x00000008 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S (1 << 4) +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_S_LESS 0x00000010 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T (1 << 5) +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_T_LESS 0x00000020 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R (1 << 6) +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_R_LESS 0x00000040 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q (1 << 7) +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX1_Q_LESS 0x00000080 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S (1 << 8) +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_S_LESS 0x00000100 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T (1 << 9) +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_T_LESS 0x00000200 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R (1 << 10) +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_R_LESS 0x00000400 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q (1 << 11) +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX2_Q_LESS 0x00000800 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S (1 << 12) +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_S_LESS 0x00001000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T (1 << 13) +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_T_LESS 0x00002000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R (1 << 14) +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_R_LESS 0x00004000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q (1 << 15) +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_GEQUAL 0x00000000 +#define NV20TCL_TX_SHADER_CULL_MODE_TX3_Q_LESS 0x00008000 +#define NV20TCL_VERTEX_BEGIN_END 0x000017fc +#define NV20TCL_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV20TCL_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV20TCL_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV20TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV20TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV20TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV20TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV20TCL_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV20TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV20TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a +#define NV20TCL_VB_ELEMENT_U16 0x00001800 +#define NV20TCL_VB_ELEMENT_U16_I0_SHIFT 0 +#define NV20TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff +#define NV20TCL_VB_ELEMENT_U16_I1_SHIFT 16 +#define NV20TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 +#define NV20TCL_VB_VERTEX_BATCH 0x00001810 +#define NV20TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0 +#define NV20TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff +#define NV20TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 +#define NV20TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 +#define NV20TCL_VERTEX_DATA 0x00001818 +#define NV20TCL_TX_SHADER_CONST_EYE_X 0x0000181c +#define NV20TCL_TX_SHADER_CONST_EYE_Y 0x00001820 +#define NV20TCL_TX_SHADER_CONST_EYE_Z 0x00001824 +#define NV20TCL_VTX_ATTR_4F_X(x) (0x00001a00+((x)*16)) +#define NV20TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV20TCL_VTX_ATTR_4F_Y(x) (0x00001a04+((x)*16)) +#define NV20TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV20TCL_VTX_ATTR_4F_Z(x) (0x00001a08+((x)*16)) +#define NV20TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV20TCL_VTX_ATTR_4F_W(x) (0x00001a0c+((x)*16)) +#define NV20TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV20TCL_TX_OFFSET(x) (0x00001b00+((x)*64)) +#define NV20TCL_TX_OFFSET__SIZE 0x00000004 +#define NV20TCL_TX_FORMAT(x) (0x00001b04+((x)*64)) +#define NV20TCL_TX_FORMAT__SIZE 0x00000004 +#define NV20TCL_TX_FORMAT_DMA0 (1 << 0) +#define NV20TCL_TX_FORMAT_DMA1 (1 << 1) +#define NV20TCL_TX_FORMAT_CUBIC (1 << 2) +#define NV20TCL_TX_FORMAT_NO_BORDER (1 << 3) +#define NV20TCL_TX_FORMAT_DIMS_SHIFT 4 +#define NV20TCL_TX_FORMAT_DIMS_MASK 0x000000f0 +#define NV20TCL_TX_FORMAT_DIMS_1D 0x00000010 +#define NV20TCL_TX_FORMAT_DIMS_2D 0x00000020 +#define NV20TCL_TX_FORMAT_DIMS_3D 0x00000030 +#define NV20TCL_TX_FORMAT_FORMAT_SHIFT 8 +#define NV20TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00 +#define NV20TCL_TX_FORMAT_FORMAT_L8 0x00000000 +#define NV20TCL_TX_FORMAT_FORMAT_A8 0x00000100 +#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300 +#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500 +#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600 +#define NV20TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700 +#define NV20TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00 +#define NV20TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00 +#define NV20TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00 +#define NV20TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00 +#define NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV20TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100 +#define NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 +#define NV20TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300 +#define NV20TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00 +#define NV20TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00 +#define NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV20TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 +#define NV20TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000 +#define NV20TCL_TX_FORMAT_FORMAT_DSDT 0x00002800 +#define NV20TCL_TX_FORMAT_FORMAT_A16 0x00003200 +#define NV20TCL_TX_FORMAT_FORMAT_HILO16 0x00003300 +#define NV20TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV20TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600 +#define NV20TCL_TX_FORMAT_FORMAT_HILO8 0x00004400 +#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500 +#define NV20TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600 +#define NV20TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700 +#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 +#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 +#define NV20TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 +#define NV20TCL_TX_FORMAT_MIPMAP (1 << 19) +#define NV20TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20 +#define NV20TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000 +#define NV20TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV20TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000 +#define NV20TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28 +#define NV20TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000 +#define NV20TCL_TX_WRAP(x) (0x00001b08+((x)*64)) +#define NV20TCL_TX_WRAP__SIZE 0x00000004 +#define NV20TCL_TX_WRAP_S_SHIFT 0 +#define NV20TCL_TX_WRAP_S_MASK 0x000000ff +#define NV20TCL_TX_WRAP_S_REPEAT 0x00000001 +#define NV20TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV20TCL_TX_WRAP_S_CLAMP 0x00000005 +#define NV20TCL_TX_WRAP_T_SHIFT 8 +#define NV20TCL_TX_WRAP_T_MASK 0x00000f00 +#define NV20TCL_TX_WRAP_T_REPEAT 0x00000100 +#define NV20TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV20TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV20TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV20TCL_TX_WRAP_T_CLAMP 0x00000500 +#define NV20TCL_TX_WRAP_R_SHIFT 16 +#define NV20TCL_TX_WRAP_R_MASK 0x000f0000 +#define NV20TCL_TX_WRAP_R_REPEAT 0x00010000 +#define NV20TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV20TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV20TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV20TCL_TX_WRAP_R_CLAMP 0x00050000 +#define NV20TCL_TX_ENABLE(x) (0x00001b0c+((x)*64)) +#define NV20TCL_TX_ENABLE__SIZE 0x00000004 +#define NV20TCL_TX_ENABLE_ANISO_SHIFT 4 +#define NV20TCL_TX_ENABLE_ANISO_MASK 0x00000030 +#define NV20TCL_TX_ENABLE_ANISO_NONE 0x00000000 +#define NV20TCL_TX_ENABLE_ANISO_2X 0x00000010 +#define NV20TCL_TX_ENABLE_ANISO_4X 0x00000020 +#define NV20TCL_TX_ENABLE_ANISO_8X 0x00000030 +#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14 +#define NV20TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000 +#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26 +#define NV20TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000 +#define NV20TCL_TX_ENABLE_ENABLE (1 << 30) +#define NV20TCL_TX_SWIZZLE(x) (0x00001b10+((x)*64)) +#define NV20TCL_TX_SWIZZLE__SIZE 0x00000004 +#define NV20TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16 +#define NV20TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000 +#define NV20TCL_TX_FILTER(x) (0x00001b14+((x)*64)) +#define NV20TCL_TX_FILTER__SIZE 0x00000004 +#define NV20TCL_TX_FILTER_LOD_BIAS_SHIFT 8 +#define NV20TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00 +#define NV20TCL_TX_FILTER_MINIFY_SHIFT 16 +#define NV20TCL_TX_FILTER_MINIFY_MASK 0x000f0000 +#define NV20TCL_TX_FILTER_MINIFY_NEAREST 0x00010000 +#define NV20TCL_TX_FILTER_MINIFY_LINEAR 0x00020000 +#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV20TCL_TX_FILTER_MAGNIFY_SHIFT 24 +#define NV20TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000 +#define NV20TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000 +#define NV20TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000 +#define NV20TCL_TX_NPOT_SIZE(x) (0x00001b1c+((x)*64)) +#define NV20TCL_TX_NPOT_SIZE__SIZE 0x00000004 +#define NV20TCL_TX_NPOT_SIZE_H_SHIFT 0 +#define NV20TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff +#define NV20TCL_TX_NPOT_SIZE_W_SHIFT 16 +#define NV20TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 +#define NV20TCL_TX_PALETTE_OFFSET(x) (0x00001b20+((x)*64)) +#define NV20TCL_TX_PALETTE_OFFSET__SIZE 0x00000004 +#define NV20TCL_TX_BORDER_COLOR(x) (0x00001b24+((x)*64)) +#define NV20TCL_TX_BORDER_COLOR__SIZE 0x00000004 +#define NV20TCL_TX_BORDER_COLOR_B_SHIFT 0 +#define NV20TCL_TX_BORDER_COLOR_B_MASK 0x000000ff +#define NV20TCL_TX_BORDER_COLOR_G_SHIFT 8 +#define NV20TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00 +#define NV20TCL_TX_BORDER_COLOR_R_SHIFT 16 +#define NV20TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000 +#define NV20TCL_TX_BORDER_COLOR_A_SHIFT 24 +#define NV20TCL_TX_BORDER_COLOR_A_MASK 0xff000000 +#define NV20TCL_TX_SHADER_OFFSET_MATRIX00(x) (0x00001b28+((x)*64)) +#define NV20TCL_TX_SHADER_OFFSET_MATRIX00__SIZE 0x00000004 +#define NV20TCL_TX_SHADER_OFFSET_MATRIX01(x) (0x00001b2c+((x)*64)) +#define NV20TCL_TX_SHADER_OFFSET_MATRIX01__SIZE 0x00000004 +#define NV20TCL_TX_SHADER_OFFSET_MATRIX11(x) (0x00001b30+((x)*64)) +#define NV20TCL_TX_SHADER_OFFSET_MATRIX11__SIZE 0x00000004 +#define NV20TCL_TX_SHADER_OFFSET_MATRIX10(x) (0x00001b34+((x)*64)) +#define NV20TCL_TX_SHADER_OFFSET_MATRIX10__SIZE 0x00000004 +#define NV20TCL_DEPTH_UNK17D8 0x00001d78 +#define NV20TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4 +#define NV20TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0 +#define NV20TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV20TCL_CLEAR_DEPTH_VALUE 0x00001d8c +#define NV20TCL_CLEAR_VALUE 0x00001d90 +#define NV20TCL_CLEAR_BUFFERS 0x00001d94 +#define NV20TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) +#define NV20TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) +#define NV20TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) +#define NV20TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) +#define NV20TCL_CLEAR_BUFFERS_STENCIL (1 << 1) +#define NV20TCL_CLEAR_BUFFERS_DEPTH (1 << 0) +#define NV20TCL_RC_COLOR0 0x00001e20 +#define NV20TCL_RC_COLOR0_B_SHIFT 0 +#define NV20TCL_RC_COLOR0_B_MASK 0x000000ff +#define NV20TCL_RC_COLOR0_G_SHIFT 8 +#define NV20TCL_RC_COLOR0_G_MASK 0x0000ff00 +#define NV20TCL_RC_COLOR0_R_SHIFT 16 +#define NV20TCL_RC_COLOR0_R_MASK 0x00ff0000 +#define NV20TCL_RC_COLOR0_A_SHIFT 24 +#define NV20TCL_RC_COLOR0_A_MASK 0xff000000 +#define NV20TCL_RC_COLOR1 0x00001e24 +#define NV20TCL_RC_COLOR1_B_SHIFT 0 +#define NV20TCL_RC_COLOR1_B_MASK 0x000000ff +#define NV20TCL_RC_COLOR1_G_SHIFT 8 +#define NV20TCL_RC_COLOR1_G_MASK 0x0000ff00 +#define NV20TCL_RC_COLOR1_R_SHIFT 16 +#define NV20TCL_RC_COLOR1_R_MASK 0x00ff0000 +#define NV20TCL_RC_COLOR1_A_SHIFT 24 +#define NV20TCL_RC_COLOR1_A_MASK 0xff000000 +#define NV20TCL_BACK_MATERIAL_SHININESS(x) (0x00001e28+((x)*4)) +#define NV20TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV20TCL_RC_OUT_RGB(x) (0x00001e40+((x)*4)) +#define NV20TCL_RC_OUT_RGB__SIZE 0x00000008 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV20TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV20TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV20TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV20TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12) +#define NV20TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13) +#define NV20TCL_RC_OUT_RGB_MUX_SUM (1 << 14) +#define NV20TCL_RC_OUT_RGB_BIAS (1 << 15) +#define NV20TCL_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV20TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV20TCL_RC_OUT_RGB_SCALE_SHIFT 17 +#define NV20TCL_RC_OUT_RGB_SCALE_MASK 0x00000000 +#define NV20TCL_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV20TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV20TCL_RC_ENABLE 0x00001e60 +#define NV20TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0 +#define NV20TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f +#define NV20TCL_TX_RCOMP 0x00001e6c +#define NV20TCL_TX_RCOMP_NEVER 0x00000000 +#define NV20TCL_TX_RCOMP_GREATER 0x00000001 +#define NV20TCL_TX_RCOMP_EQUAL 0x00000002 +#define NV20TCL_TX_RCOMP_GEQUAL 0x00000003 +#define NV20TCL_TX_RCOMP_LESS 0x00000004 +#define NV20TCL_TX_RCOMP_NOTEQUAL 0x00000005 +#define NV20TCL_TX_RCOMP_LEQUAL 0x00000006 +#define NV20TCL_TX_RCOMP_ALWAYS 0x00000007 +#define NV20TCL_TX_SHADER_OP 0x00001e70 +#define NV20TCL_TX_SHADER_OP_TX0_SHIFT 0 +#define NV20TCL_TX_SHADER_OP_TX0_MASK 0x0000001f +#define NV20TCL_TX_SHADER_OP_TX0_NONE 0x00000000 +#define NV20TCL_TX_SHADER_OP_TX0_TEXTURE_2D 0x00000001 +#define NV20TCL_TX_SHADER_OP_TX0_PASS_THROUGH 0x00000004 +#define NV20TCL_TX_SHADER_OP_TX0_CULL_FRAGMENT 0x00000005 +#define NV20TCL_TX_SHADER_OP_TX0_OFFSET_TEXTURE_2D 0x00000006 +#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_TEXTURE_2D 0x00000009 +#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT_DEPTH_REPLACE 0x0000000a +#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_AR_TEXTURE_2D 0x0000000f +#define NV20TCL_TX_SHADER_OP_TX0_DEPENDANT_GB_TEXTURE_2D 0x00000010 +#define NV20TCL_TX_SHADER_OP_TX0_DOT_PRODUCT 0x00000011 +#define NV20TCL_TX_SHADER_OP_TX1_SHIFT 5 +#define NV20TCL_TX_SHADER_OP_TX1_MASK 0x000003e0 +#define NV20TCL_TX_SHADER_OP_TX1_NONE 0x00000000 +#define NV20TCL_TX_SHADER_OP_TX1_TEXTURE_2D 0x00000020 +#define NV20TCL_TX_SHADER_OP_TX1_PASS_THROUGH 0x00000080 +#define NV20TCL_TX_SHADER_OP_TX1_CULL_FRAGMENT 0x000000a0 +#define NV20TCL_TX_SHADER_OP_TX1_OFFSET_TEXTURE_2D 0x000000c0 +#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_TEXTURE_2D 0x00000120 +#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT_DEPTH_REPLACE 0x00000140 +#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_AR_TEXTURE_2D 0x000001e0 +#define NV20TCL_TX_SHADER_OP_TX1_DEPENDANT_GB_TEXTURE_2D 0x00000200 +#define NV20TCL_TX_SHADER_OP_TX1_DOT_PRODUCT 0x00000220 +#define NV20TCL_TX_SHADER_OP_TX2_SHIFT 10 +#define NV20TCL_TX_SHADER_OP_TX2_MASK 0x00007c00 +#define NV20TCL_TX_SHADER_OP_TX2_NONE 0x00000000 +#define NV20TCL_TX_SHADER_OP_TX2_TEXTURE_2D 0x00000400 +#define NV20TCL_TX_SHADER_OP_TX2_PASS_THROUGH 0x00001000 +#define NV20TCL_TX_SHADER_OP_TX2_CULL_FRAGMENT 0x00001400 +#define NV20TCL_TX_SHADER_OP_TX2_OFFSET_TEXTURE_2D 0x00001800 +#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_TEXTURE_2D 0x00002400 +#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT_DEPTH_REPLACE 0x00002800 +#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_AR_TEXTURE_2D 0x00003c00 +#define NV20TCL_TX_SHADER_OP_TX2_DEPENDANT_GB_TEXTURE_2D 0x00004000 +#define NV20TCL_TX_SHADER_OP_TX2_DOT_PRODUCT 0x00004400 +#define NV20TCL_TX_SHADER_OP_TX3_SHIFT 15 +#define NV20TCL_TX_SHADER_OP_TX3_MASK 0x000f8000 +#define NV20TCL_TX_SHADER_OP_TX3_NONE 0x00000000 +#define NV20TCL_TX_SHADER_OP_TX3_TEXTURE_2D 0x00008000 +#define NV20TCL_TX_SHADER_OP_TX3_PASS_THROUGH 0x00020000 +#define NV20TCL_TX_SHADER_OP_TX3_CULL_FRAGMENT 0x00028000 +#define NV20TCL_TX_SHADER_OP_TX3_OFFSET_TEXTURE_2D 0x00030000 +#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_TEXTURE_2D 0x00048000 +#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT_DEPTH_REPLACE 0x00050000 +#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_AR_TEXTURE_2D 0x00078000 +#define NV20TCL_TX_SHADER_OP_TX3_DEPENDANT_GB_TEXTURE_2D 0x00080000 +#define NV20TCL_TX_SHADER_OP_TX3_DOT_PRODUCT 0x00088000 +#define NV20TCL_TX_SHADER_DOTMAPPING 0x00001e74 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_SHIFT 0 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX0_MASK 0x0000000f +#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_SHIFT 4 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX1_MASK 0x000000f0 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_SHIFT 8 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX2_MASK 0x00000f00 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_SHIFT 12 +#define NV20TCL_TX_SHADER_DOTMAPPING_TX3_MASK 0x0000f000 +#define NV20TCL_TX_SHADER_PREVIOUS 0x00001e78 +#define NV20TCL_TX_SHADER_PREVIOUS_TX0_SHIFT 8 +#define NV20TCL_TX_SHADER_PREVIOUS_TX0_MASK 0x00000f00 +#define NV20TCL_TX_SHADER_PREVIOUS_TX1_SHIFT 12 +#define NV20TCL_TX_SHADER_PREVIOUS_TX1_MASK 0x0000f000 +#define NV20TCL_TX_SHADER_PREVIOUS_TX2_SHIFT 16 +#define NV20TCL_TX_SHADER_PREVIOUS_TX2_MASK 0x00030000 +#define NV20TCL_TX_SHADER_PREVIOUS_TX3_SHIFT 20 +#define NV20TCL_TX_SHADER_PREVIOUS_TX3_MASK 0x00300000 +#define NV20TCL_ENGINE 0x00001e94 +#define NV20TCL_ENGINE_VP (1 << 1) +#define NV20TCL_ENGINE_FIXED (1 << 2) +#define NV20TCL_VP_UPLOAD_FROM_ID 0x00001e9c +#define NV20TCL_VP_START_FROM_ID 0x00001ea0 +#define NV20TCL_VP_UPLOAD_CONST_ID 0x00001ea4 +#define NV20TCL_VIEWPORT_TRANSLATE_X 0x00001f00 +#define NV20TCL_VIEWPORT_TRANSLATE_Y 0x00001f04 +#define NV20TCL_VIEWPORT_TRANSLATE_Z 0x00001f08 +#define NV20TCL_VIEWPORT_TRANSLATE_W 0x00001f0c + + +#define NV17TCL 0x00000099 + +#define NV17TCL_DMA_IN_MEMORY4 0x000001ac +#define NV17TCL_DMA_IN_MEMORY5 0x000001b0 +#define NV17TCL_COLOR_MASK_ENABLE 0x000002bc +#define NV17TCL_LMA_DEPTH_BUFFER_PITCH 0x00000d5c +#define NV17TCL_LMA_DEPTH_BUFFER_OFFSET 0x00000d60 +#define NV17TCL_LMA_DEPTH_FILL_VALUE 0x00000d68 +#define NV17TCL_LMA_DEPTH_BUFFER_CLEAR 0x00000d6c +#define NV17TCL_LMA_DEPTH_ENABLE 0x00001658 + + +#define NV20_SWIZZLED_SURFACE 0x0000009e + + + +#define NV12_IMAGE_BLIT 0x0000009f + + + +#define NV30_CONTEXT_SURFACES_2D 0x00000362 + + + +#define NV30_STRETCHED_IMAGE_FROM_CPU 0x00000366 + + + +#define NV30_TEXTURE_FROM_CPU 0x0000037b + + + +#define NV30_SCALED_IMAGE_FROM_MEMORY 0x00000389 + + + +#define NV30_IMAGE_FROM_CPU 0x0000038a + + + +#define NV30TCL 0x00000397 + + + +#define NV30_SWIZZLED_SURFACE 0x0000039e + + + +#define NV35TCL 0x00000497 + + + +#define NV25TCL 0x00000597 + +#define NV25TCL_DMA_IN_MEMORY4 0x0000019c +#define NV25TCL_DMA_IN_MEMORY5 0x000001a0 +#define NV25TCL_DMA_IN_MEMORY8 0x000001ac +#define NV25TCL_DMA_IN_MEMORY9 0x000001b0 + + +#define NV34TCL 0x00000697 + +#define NV34TCL_NOP 0x00000100 +#define NV34TCL_NOTIFY 0x00000104 +#define NV34TCL_DMA_NOTIFY 0x00000180 +#define NV34TCL_DMA_TEXTURE0 0x00000184 +#define NV34TCL_DMA_TEXTURE1 0x00000188 +#define NV34TCL_DMA_COLOR1 0x0000018c +#define NV34TCL_DMA_COLOR0 0x00000194 +#define NV34TCL_DMA_ZETA 0x00000198 +#define NV34TCL_DMA_VTXBUF0 0x0000019c +#define NV34TCL_DMA_VTXBUF1 0x000001a0 +#define NV34TCL_DMA_FENCE 0x000001a4 +#define NV34TCL_DMA_QUERY 0x000001a8 +#define NV34TCL_DMA_IN_MEMORY7 0x000001ac +#define NV34TCL_DMA_IN_MEMORY8 0x000001b0 +#define NV34TCL_RT_HORIZ 0x00000200 +#define NV34TCL_RT_HORIZ_X_SHIFT 0 +#define NV34TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_RT_HORIZ_W_SHIFT 16 +#define NV34TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_RT_VERT 0x00000204 +#define NV34TCL_RT_VERT_Y_SHIFT 0 +#define NV34TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV34TCL_RT_VERT_H_SHIFT 16 +#define NV34TCL_RT_VERT_H_MASK 0xffff0000 +#define NV34TCL_RT_FORMAT 0x00000208 +#define NV34TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV34TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV34TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV34TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV34TCL_RT_FORMAT_ZETA_SHIFT 5 +#define NV34TCL_RT_FORMAT_ZETA_MASK 0x000000e0 +#define NV34TCL_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV34TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV34TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV34TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV34TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV34TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV34TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV34TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV34TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d +#define NV34TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV34TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV34TCL_COLOR0_PITCH 0x0000020c +#define NV34TCL_COLOR0_PITCH_COLOR0_SHIFT 0 +#define NV34TCL_COLOR0_PITCH_COLOR0_MASK 0x0000ffff +#define NV34TCL_COLOR0_PITCH_ZETA_SHIFT 16 +#define NV34TCL_COLOR0_PITCH_ZETA_MASK 0xffff0000 +#define NV34TCL_COLOR0_OFFSET 0x00000210 +#define NV34TCL_ZETA_OFFSET 0x00000214 +#define NV34TCL_COLOR1_OFFSET 0x00000218 +#define NV34TCL_COLOR1_PITCH 0x0000021c +#define NV34TCL_RT_ENABLE 0x00000220 +#define NV34TCL_RT_ENABLE_MRT (1 << 4) +#define NV34TCL_RT_ENABLE_COLOR1 (1 << 1) +#define NV34TCL_RT_ENABLE_COLOR0 (1 << 0) +#define NV34TCL_LMA_DEPTH_PITCH 0x0000022c +#define NV34TCL_LMA_DEPTH_OFFSET 0x00000230 +#define NV34TCL_TX_UNITS_ENABLE 0x0000023c +#define NV34TCL_TX_UNITS_ENABLE_TX0 (1 << 0) +#define NV34TCL_TX_UNITS_ENABLE_TX1 (1 << 1) +#define NV34TCL_TX_UNITS_ENABLE_TX2 (1 << 2) +#define NV34TCL_TX_UNITS_ENABLE_TX3 (1 << 3) +#define NV34TCL_TX_UNITS_ENABLE_TX4 (1 << 4) +#define NV34TCL_TX_UNITS_ENABLE_TX5 (1 << 5) +#define NV34TCL_TX_UNITS_ENABLE_TX6 (1 << 6) +#define NV34TCL_TX_UNITS_ENABLE_TX7 (1 << 7) +#define NV34TCL_TX_MATRIX_ENABLE(x) (0x00000240+((x)*4)) +#define NV34TCL_TX_MATRIX_ENABLE__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_TX_ORIGIN 0x000002b8 +#define NV34TCL_VIEWPORT_TX_ORIGIN_X_SHIFT 0 +#define NV34TCL_VIEWPORT_TX_ORIGIN_X_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_SHIFT 16 +#define NV34TCL_VIEWPORT_TX_ORIGIN_Y_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_CLIP_MODE 0x000002bc +#define NV34TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) +#define NV34TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_SHIFT 0 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_L_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_SHIFT 16 +#define NV34TCL_VIEWPORT_CLIP_HORIZ_R_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) +#define NV34TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV34TCL_VIEWPORT_CLIP_VERT_T_SHIFT 0 +#define NV34TCL_VIEWPORT_CLIP_VERT_T_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_CLIP_VERT_D_SHIFT 16 +#define NV34TCL_VIEWPORT_CLIP_VERT_D_MASK 0xffff0000 +#define NV34TCL_DITHER_ENABLE 0x00000300 +#define NV34TCL_ALPHA_FUNC_ENABLE 0x00000304 +#define NV34TCL_ALPHA_FUNC_FUNC 0x00000308 +#define NV34TCL_ALPHA_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_ALPHA_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_ALPHA_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_ALPHA_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_ALPHA_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_ALPHA_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_ALPHA_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_ALPHA_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_ALPHA_FUNC_REF 0x0000030c +#define NV34TCL_BLEND_FUNC_ENABLE 0x00000310 +#define NV34TCL_BLEND_FUNC_SRC 0x00000314 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 +#define NV34TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff +#define NV34TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV34TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV34TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV34TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV34TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV34TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV34TCL_BLEND_FUNC_DST 0x00000318 +#define NV34TCL_BLEND_FUNC_DST_RGB_SHIFT 0 +#define NV34TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff +#define NV34TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV34TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV34TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV34TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV34TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV34TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV34TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV34TCL_BLEND_COLOR 0x0000031c +#define NV34TCL_BLEND_COLOR_B_SHIFT 0 +#define NV34TCL_BLEND_COLOR_B_MASK 0x000000ff +#define NV34TCL_BLEND_COLOR_G_SHIFT 8 +#define NV34TCL_BLEND_COLOR_G_MASK 0x0000ff00 +#define NV34TCL_BLEND_COLOR_R_SHIFT 16 +#define NV34TCL_BLEND_COLOR_R_MASK 0x00ff0000 +#define NV34TCL_BLEND_COLOR_A_SHIFT 24 +#define NV34TCL_BLEND_COLOR_A_MASK 0xff000000 +#define NV34TCL_BLEND_EQUATION 0x00000320 +#define NV34TCL_BLEND_EQUATION_FUNC_ADD 0x00008006 +#define NV34TCL_BLEND_EQUATION_MIN 0x00008007 +#define NV34TCL_BLEND_EQUATION_MAX 0x00008008 +#define NV34TCL_BLEND_EQUATION_FUNC_SUBTRACT 0x0000800a +#define NV34TCL_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV34TCL_COLOR_MASK 0x00000324 +#define NV34TCL_COLOR_MASK_B_SHIFT 0 +#define NV34TCL_COLOR_MASK_B_MASK 0x000000ff +#define NV34TCL_COLOR_MASK_G_SHIFT 8 +#define NV34TCL_COLOR_MASK_G_MASK 0x0000ff00 +#define NV34TCL_COLOR_MASK_R_SHIFT 16 +#define NV34TCL_COLOR_MASK_R_MASK 0x00ff0000 +#define NV34TCL_COLOR_MASK_A_SHIFT 24 +#define NV34TCL_COLOR_MASK_A_MASK 0xff000000 +#define NV34TCL_STENCIL_BACK_ENABLE 0x00000328 +#define NV34TCL_STENCIL_BACK_MASK 0x0000032c +#define NV34TCL_STENCIL_BACK_FUNC_FUNC 0x00000330 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_STENCIL_BACK_FUNC_REF 0x00000334 +#define NV34TCL_STENCIL_BACK_FUNC_MASK 0x00000338 +#define NV34TCL_STENCIL_BACK_OP_FAIL 0x0000033c +#define NV34TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL 0x00000340 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_BACK_OP_ZPASS 0x00000344 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV34TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_ENABLE 0x00000348 +#define NV34TCL_STENCIL_FRONT_MASK 0x0000034c +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC 0x00000350 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV34TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV34TCL_STENCIL_FRONT_FUNC_REF 0x00000354 +#define NV34TCL_STENCIL_FRONT_FUNC_MASK 0x00000358 +#define NV34TCL_STENCIL_FRONT_OP_FAIL 0x0000035c +#define NV34TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL 0x00000360 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS 0x00000364 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV34TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV34TCL_SHADE_MODEL 0x00000368 +#define NV34TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV34TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV34TCL_FOG_ENABLE 0x0000036c +#define NV34TCL_FOG_COLOR 0x00000370 +#define NV34TCL_FOG_COLOR_R_SHIFT 0 +#define NV34TCL_FOG_COLOR_R_MASK 0x000000ff +#define NV34TCL_FOG_COLOR_G_SHIFT 8 +#define NV34TCL_FOG_COLOR_G_MASK 0x0000ff00 +#define NV34TCL_FOG_COLOR_B_SHIFT 16 +#define NV34TCL_FOG_COLOR_B_MASK 0x00ff0000 +#define NV34TCL_FOG_COLOR_A_SHIFT 24 +#define NV34TCL_FOG_COLOR_A_MASK 0xff000000 +#define NV34TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 +#define NV34TCL_COLOR_LOGIC_OP_OP 0x00000378 +#define NV34TCL_COLOR_LOGIC_OP_OP_CLEAR 0x00001500 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND 0x00001501 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND_REVERSE 0x00001502 +#define NV34TCL_COLOR_LOGIC_OP_OP_COPY 0x00001503 +#define NV34TCL_COLOR_LOGIC_OP_OP_AND_INVERTED 0x00001504 +#define NV34TCL_COLOR_LOGIC_OP_OP_NOOP 0x00001505 +#define NV34TCL_COLOR_LOGIC_OP_OP_XOR 0x00001506 +#define NV34TCL_COLOR_LOGIC_OP_OP_OR 0x00001507 +#define NV34TCL_COLOR_LOGIC_OP_OP_NOR 0x00001508 +#define NV34TCL_COLOR_LOGIC_OP_OP_EQUIV 0x00001509 +#define NV34TCL_COLOR_LOGIC_OP_OP_INVERT 0x0000150a +#define NV34TCL_COLOR_LOGIC_OP_OP_OR_REVERSE 0x0000150b +#define NV34TCL_COLOR_LOGIC_OP_OP_COPY_INVERTED 0x0000150c +#define NV34TCL_COLOR_LOGIC_OP_OP_OR_INVERTED 0x0000150d +#define NV34TCL_COLOR_LOGIC_OP_OP_NAND 0x0000150e +#define NV34TCL_COLOR_LOGIC_OP_OP_SET 0x0000150f +#define NV34TCL_NORMALIZE_ENABLE 0x0000037c +#define NV34TCL_COLOR_MATERIAL 0x00000390 +#define NV34TCL_COLOR_MATERIAL_FRONT_EMISSION_ENABLE (1 << 0) +#define NV34TCL_COLOR_MATERIAL_FRONT_AMBIENT_ENABLE (1 << 2) +#define NV34TCL_COLOR_MATERIAL_FRONT_DIFFUSE_ENABLE (1 << 4) +#define NV34TCL_COLOR_MATERIAL_FRONT_SPECULAR_ENABLE (1 << 6) +#define NV34TCL_COLOR_MATERIAL_BACK_EMISSION_ENABLE (1 << 8) +#define NV34TCL_COLOR_MATERIAL_BACK_AMBIENT_ENABLE (1 << 10) +#define NV34TCL_COLOR_MATERIAL_BACK_DIFFUSE_ENABLE (1 << 12) +#define NV34TCL_COLOR_MATERIAL_BACK_SPECULAR_ENABLE (1 << 14) +#define NV34TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV34TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV34TCL_COLOR_MATERIAL_FRONT_R 0x000003a0 +#define NV34TCL_COLOR_MATERIAL_FRONT_G 0x000003a4 +#define NV34TCL_COLOR_MATERIAL_FRONT_B 0x000003a8 +#define NV34TCL_COLOR_MATERIAL_FRONT_A 0x000003b4 +#define NV34TCL_LINE_WIDTH 0x000003b8 +#define NV34TCL_LINE_SMOOTH_ENABLE 0x000003bc +#define NV34TCL_TX_GEN_S(x) (0x00000400+((x)*16)) +#define NV34TCL_TX_GEN_S__SIZE 0x00000008 +#define NV34TCL_TX_GEN_S_FALSE 0x00000000 +#define NV34TCL_TX_GEN_S_EYE_LINEAR 0x00002400 +#define NV34TCL_TX_GEN_S_OBJECT_LINEAR 0x00002401 +#define NV34TCL_TX_GEN_S_SPHERE_MAP 0x00002402 +#define NV34TCL_TX_GEN_S_NORMAL_MAP 0x00008511 +#define NV34TCL_TX_GEN_S_REFLECTION_MAP 0x00008512 +#define NV34TCL_TX_GEN_T(x) (0x00000404+((x)*16)) +#define NV34TCL_TX_GEN_T__SIZE 0x00000008 +#define NV34TCL_TX_GEN_T_FALSE 0x00000000 +#define NV34TCL_TX_GEN_T_EYE_LINEAR 0x00002400 +#define NV34TCL_TX_GEN_T_OBJECT_LINEAR 0x00002401 +#define NV34TCL_TX_GEN_T_SPHERE_MAP 0x00002402 +#define NV34TCL_TX_GEN_T_NORMAL_MAP 0x00008511 +#define NV34TCL_TX_GEN_T_REFLECTION_MAP 0x00008512 +#define NV34TCL_TX_GEN_R(x) (0x00000408+((x)*16)) +#define NV34TCL_TX_GEN_R__SIZE 0x00000008 +#define NV34TCL_TX_GEN_R_FALSE 0x00000000 +#define NV34TCL_TX_GEN_R_EYE_LINEAR 0x00002400 +#define NV34TCL_TX_GEN_R_OBJECT_LINEAR 0x00002401 +#define NV34TCL_TX_GEN_R_SPHERE_MAP 0x00002402 +#define NV34TCL_TX_GEN_R_NORMAL_MAP 0x00008511 +#define NV34TCL_TX_GEN_R_REFLECTION_MAP 0x00008512 +#define NV34TCL_TX_GEN_Q(x) (0x0000040c+((x)*16)) +#define NV34TCL_TX_GEN_Q__SIZE 0x00000008 +#define NV34TCL_TX_GEN_Q_FALSE 0x00000000 +#define NV34TCL_TX_GEN_Q_EYE_LINEAR 0x00002400 +#define NV34TCL_TX_GEN_Q_OBJECT_LINEAR 0x00002401 +#define NV34TCL_TX_GEN_Q_SPHERE_MAP 0x00002402 +#define NV34TCL_TX_GEN_Q_NORMAL_MAP 0x00008511 +#define NV34TCL_TX_GEN_Q_REFLECTION_MAP 0x00008512 +#define NV34TCL_MODELVIEW_MATRIX(x) (0x00000480+((x)*4)) +#define NV34TCL_MODELVIEW_MATRIX__SIZE 0x00000010 +#define NV34TCL_INVERSE_MODELVIEW_MATRIX(x) (0x00000580+((x)*4)) +#define NV34TCL_INVERSE_MODELVIEW_MATRIX__SIZE 0x0000000c +#define NV34TCL_PROJECTION_MATRIX(x) (0x00000680+((x)*4)) +#define NV34TCL_PROJECTION_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX0_MATRIX(x) (0x000006c0+((x)*4)) +#define NV34TCL_TX0_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX1_MATRIX(x) (0x00000700+((x)*4)) +#define NV34TCL_TX1_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX2_MATRIX(x) (0x00000740+((x)*4)) +#define NV34TCL_TX2_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX3_MATRIX(x) (0x00000780+((x)*4)) +#define NV34TCL_TX3_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX4_MATRIX(x) (0x000007c0+((x)*4)) +#define NV34TCL_TX4_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX5_MATRIX(x) (0x00000800+((x)*4)) +#define NV34TCL_TX5_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX6_MATRIX(x) (0x00000840+((x)*4)) +#define NV34TCL_TX6_MATRIX__SIZE 0x00000010 +#define NV34TCL_TX7_MATRIX(x) (0x00000880+((x)*4)) +#define NV34TCL_TX7_MATRIX__SIZE 0x00000010 +#define NV34TCL_SCISSOR_HORIZ 0x000008c0 +#define NV34TCL_SCISSOR_HORIZ_X_SHIFT 0 +#define NV34TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_SCISSOR_HORIZ_W_SHIFT 16 +#define NV34TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_SCISSOR_VERT 0x000008c4 +#define NV34TCL_SCISSOR_VERT_Y_SHIFT 0 +#define NV34TCL_SCISSOR_VERT_Y_MASK 0x0000ffff +#define NV34TCL_SCISSOR_VERT_H_SHIFT 16 +#define NV34TCL_SCISSOR_VERT_H_MASK 0xffff0000 +#define NV34TCL_FOG_COORD_DIST 0x000008c8 +#define NV34TCL_FOG_COORD_DIST_COORD_FALSE 0x00000000 +#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_RADIAL_NV 0x00000001 +#define NV34TCL_FOG_COORD_DIST_COORD_FRAGMENT_DEPTH_DISTANCE_EYE_PLANE_ABSOLUTE_NV 0x00000002 +#define NV34TCL_FOG_COORD_DIST_COORD_FOG 0x00000003 +#define NV34TCL_FOG_MODE 0x000008cc +#define NV34TCL_FOG_MODE_EXP 0x00000800 +#define NV34TCL_FOG_MODE_EXP_2 0x00000802 +#define NV34TCL_FOG_MODE_EXP2 0x00000803 +#define NV34TCL_FOG_MODE_LINEAR 0x00000804 +#define NV34TCL_FOG_MODE_LINEAR_2 0x00002601 +#define NV34TCL_FOG_EQUATION_CONSTANT 0x000008d0 +#define NV34TCL_FOG_EQUATION_LINEAR 0x000008d4 +#define NV34TCL_FOG_EQUATION_QUADRATIC 0x000008d8 +#define NV34TCL_FP_ACTIVE_PROGRAM 0x000008e4 +#define NV34TCL_FP_ACTIVE_PROGRAM_DMA0 (1 << 0) +#define NV34TCL_FP_ACTIVE_PROGRAM_DMA1 (1 << 1) +#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_SHIFT 2 +#define NV34TCL_FP_ACTIVE_PROGRAM_OFFSET_MASK 0xfffffffc +#define NV34TCL_RC_COLOR0 0x000008ec +#define NV34TCL_RC_COLOR0_B_SHIFT 0 +#define NV34TCL_RC_COLOR0_B_MASK 0x000000ff +#define NV34TCL_RC_COLOR0_G_SHIFT 8 +#define NV34TCL_RC_COLOR0_G_MASK 0x0000ff00 +#define NV34TCL_RC_COLOR0_R_SHIFT 16 +#define NV34TCL_RC_COLOR0_R_MASK 0x00ff0000 +#define NV34TCL_RC_COLOR0_A_SHIFT 24 +#define NV34TCL_RC_COLOR0_A_MASK 0xff000000 +#define NV34TCL_RC_COLOR1 0x000008f0 +#define NV34TCL_RC_COLOR1_B_SHIFT 0 +#define NV34TCL_RC_COLOR1_B_MASK 0x000000ff +#define NV34TCL_RC_COLOR1_G_SHIFT 8 +#define NV34TCL_RC_COLOR1_G_MASK 0x0000ff00 +#define NV34TCL_RC_COLOR1_R_SHIFT 16 +#define NV34TCL_RC_COLOR1_R_MASK 0x00ff0000 +#define NV34TCL_RC_COLOR1_A_SHIFT 24 +#define NV34TCL_RC_COLOR1_A_MASK 0xff000000 +#define NV34TCL_RC_FINAL0 0x000008f4 +#define NV34TCL_RC_FINAL0_D_INPUT_SHIFT 0 +#define NV34TCL_RC_FINAL0_D_INPUT_MASK 0x0000000f +#define NV34TCL_RC_FINAL0_D_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV34TCL_RC_FINAL0_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV34TCL_RC_FINAL0_D_INPUT_FOG 0x00000003 +#define NV34TCL_RC_FINAL0_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV34TCL_RC_FINAL0_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV34TCL_RC_FINAL0_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_NV 0x0000000c +#define NV34TCL_RC_FINAL0_D_INPUT_SPARE1_NV 0x0000000d +#define NV34TCL_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV34TCL_RC_FINAL0_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE (1 << 4) +#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL0_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV34TCL_RC_FINAL0_D_MAPPING_SHIFT 5 +#define NV34TCL_RC_FINAL0_D_MAPPING_MASK 0x000000e0 +#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV34TCL_RC_FINAL0_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV34TCL_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV34TCL_RC_FINAL0_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV34TCL_RC_FINAL0_C_INPUT_SHIFT 8 +#define NV34TCL_RC_FINAL0_C_INPUT_MASK 0x00000f00 +#define NV34TCL_RC_FINAL0_C_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_FINAL0_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_FINAL0_C_INPUT_FOG 0x00000300 +#define NV34TCL_RC_FINAL0_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_FINAL0_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_FINAL0_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_FINAL0_C_INPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_FINAL0_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE (1 << 12) +#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL0_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV34TCL_RC_FINAL0_C_MAPPING_SHIFT 13 +#define NV34TCL_RC_FINAL0_C_MAPPING_MASK 0x0000e000 +#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV34TCL_RC_FINAL0_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV34TCL_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV34TCL_RC_FINAL0_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV34TCL_RC_FINAL0_B_INPUT_SHIFT 16 +#define NV34TCL_RC_FINAL0_B_INPUT_MASK 0x000f0000 +#define NV34TCL_RC_FINAL0_B_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV34TCL_RC_FINAL0_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV34TCL_RC_FINAL0_B_INPUT_FOG 0x00030000 +#define NV34TCL_RC_FINAL0_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV34TCL_RC_FINAL0_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV34TCL_RC_FINAL0_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_NV 0x000c0000 +#define NV34TCL_RC_FINAL0_B_INPUT_SPARE1_NV 0x000d0000 +#define NV34TCL_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV34TCL_RC_FINAL0_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE (1 << 20) +#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL0_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV34TCL_RC_FINAL0_B_MAPPING_SHIFT 21 +#define NV34TCL_RC_FINAL0_B_MAPPING_MASK 0x00e00000 +#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV34TCL_RC_FINAL0_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV34TCL_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV34TCL_RC_FINAL0_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV34TCL_RC_FINAL0_A_INPUT_SHIFT 24 +#define NV34TCL_RC_FINAL0_A_INPUT_MASK 0x0f000000 +#define NV34TCL_RC_FINAL0_A_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV34TCL_RC_FINAL0_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV34TCL_RC_FINAL0_A_INPUT_FOG 0x03000000 +#define NV34TCL_RC_FINAL0_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV34TCL_RC_FINAL0_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV34TCL_RC_FINAL0_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_NV 0x0c000000 +#define NV34TCL_RC_FINAL0_A_INPUT_SPARE1_NV 0x0d000000 +#define NV34TCL_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV34TCL_RC_FINAL0_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE (1 << 28) +#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL0_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_SHIFT 29 +#define NV34TCL_RC_FINAL0_A_MAPPING_MASK 0xe0000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV34TCL_RC_FINAL0_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV34TCL_RC_FINAL1 0x000008f8 +#define NV34TCL_RC_FINAL1_COLOR_SUM_CLAMP (1 << 7) +#define NV34TCL_RC_FINAL1_G_INPUT_SHIFT 8 +#define NV34TCL_RC_FINAL1_G_INPUT_MASK 0x00000f00 +#define NV34TCL_RC_FINAL1_G_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_FINAL1_G_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_FINAL1_G_INPUT_FOG 0x00000300 +#define NV34TCL_RC_FINAL1_G_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_FINAL1_G_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_FINAL1_G_INPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_FINAL1_G_INPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_FINAL1_G_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE (1 << 12) +#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL1_G_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV34TCL_RC_FINAL1_G_MAPPING_SHIFT 13 +#define NV34TCL_RC_FINAL1_G_MAPPING_MASK 0x0000e000 +#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV34TCL_RC_FINAL1_G_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV34TCL_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV34TCL_RC_FINAL1_G_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV34TCL_RC_FINAL1_F_INPUT_SHIFT 16 +#define NV34TCL_RC_FINAL1_F_INPUT_MASK 0x000f0000 +#define NV34TCL_RC_FINAL1_F_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV34TCL_RC_FINAL1_F_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV34TCL_RC_FINAL1_F_INPUT_FOG 0x00030000 +#define NV34TCL_RC_FINAL1_F_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV34TCL_RC_FINAL1_F_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE0_ARB 0x00080000 +#define NV34TCL_RC_FINAL1_F_INPUT_TEXTURE1_ARB 0x00090000 +#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_NV 0x000c0000 +#define NV34TCL_RC_FINAL1_F_INPUT_SPARE1_NV 0x000d0000 +#define NV34TCL_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV34TCL_RC_FINAL1_F_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE (1 << 20) +#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL1_F_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV34TCL_RC_FINAL1_F_MAPPING_SHIFT 21 +#define NV34TCL_RC_FINAL1_F_MAPPING_MASK 0x00e00000 +#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV34TCL_RC_FINAL1_F_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV34TCL_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV34TCL_RC_FINAL1_F_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV34TCL_RC_FINAL1_E_INPUT_SHIFT 24 +#define NV34TCL_RC_FINAL1_E_INPUT_MASK 0x0f000000 +#define NV34TCL_RC_FINAL1_E_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV34TCL_RC_FINAL1_E_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV34TCL_RC_FINAL1_E_INPUT_FOG 0x03000000 +#define NV34TCL_RC_FINAL1_E_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV34TCL_RC_FINAL1_E_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE0_ARB 0x08000000 +#define NV34TCL_RC_FINAL1_E_INPUT_TEXTURE1_ARB 0x09000000 +#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_NV 0x0c000000 +#define NV34TCL_RC_FINAL1_E_INPUT_SPARE1_NV 0x0d000000 +#define NV34TCL_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV34TCL_RC_FINAL1_E_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE (1 << 28) +#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_FINAL1_E_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_SHIFT 29 +#define NV34TCL_RC_FINAL1_E_MAPPING_MASK 0xe0000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV34TCL_RC_FINAL1_E_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV34TCL_RC_ENABLE 0x000008fc +#define NV34TCL_RC_ENABLE_NUM_COMBINERS_SHIFT 0 +#define NV34TCL_RC_ENABLE_NUM_COMBINERS_MASK 0x0000000f +#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_SHIFT 12 +#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR0_MASK 0x0000f000 +#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_SHIFT 16 +#define NV34TCL_RC_ENABLE_STAGE_CONSTANT_COLOR1_MASK 0x000f0000 +#define NV34TCL_RC_IN_ALPHA(x) (0x00000900+((x)*32)) +#define NV34TCL_RC_IN_ALPHA__SIZE 0x00000008 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_SHIFT 0 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_MASK 0x0000000f +#define NV34TCL_RC_IN_ALPHA_D_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_FOG 0x00000003 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_NV 0x0000000c +#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE1_NV 0x0000000d +#define NV34TCL_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV34TCL_RC_IN_ALPHA_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE (1 << 4) +#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE 0x00000000 +#define NV34TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SHIFT 5 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_MASK 0x000000e0 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV34TCL_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_SHIFT 8 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_MASK 0x00000f00 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_FOG 0x00000300 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_IN_ALPHA_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE (1 << 12) +#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE 0x00000000 +#define NV34TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SHIFT 13 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_MASK 0x0000e000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV34TCL_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_SHIFT 16 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_MASK 0x000f0000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_FOG 0x00030000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_NV 0x000c0000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE1_NV 0x000d0000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV34TCL_RC_IN_ALPHA_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE (1 << 20) +#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE 0x00000000 +#define NV34TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SHIFT 21 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_MASK 0x00e00000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV34TCL_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_SHIFT 24 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_MASK 0x0f000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_FOG 0x03000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_NV 0x0c000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE1_NV 0x0d000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV34TCL_RC_IN_ALPHA_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE (1 << 28) +#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE 0x00000000 +#define NV34TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SHIFT 29 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_MASK 0xe0000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV34TCL_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV34TCL_RC_IN_RGB(x) (0x00000904+((x)*32)) +#define NV34TCL_RC_IN_RGB__SIZE 0x00000008 +#define NV34TCL_RC_IN_RGB_D_INPUT_SHIFT 0 +#define NV34TCL_RC_IN_RGB_D_INPUT_MASK 0x0000000f +#define NV34TCL_RC_IN_RGB_D_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV34TCL_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV34TCL_RC_IN_RGB_D_INPUT_FOG 0x00000003 +#define NV34TCL_RC_IN_RGB_D_INPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV34TCL_RC_IN_RGB_D_INPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE0_ARB 0x00000008 +#define NV34TCL_RC_IN_RGB_D_INPUT_TEXTURE1_ARB 0x00000009 +#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_NV 0x0000000c +#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE1_NV 0x0000000d +#define NV34TCL_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV34TCL_RC_IN_RGB_D_INPUT_E_TIMES_F_NV 0x0000000f +#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE (1 << 4) +#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA 0x00000010 +#define NV34TCL_RC_IN_RGB_D_MAPPING_SHIFT 5 +#define NV34TCL_RC_IN_RGB_D_MAPPING_MASK 0x000000e0 +#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV 0x00000020 +#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL_NV 0x00000040 +#define NV34TCL_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE_NV 0x00000060 +#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL_NV 0x00000080 +#define NV34TCL_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE_NV 0x000000a0 +#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY_NV 0x000000c0 +#define NV34TCL_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE_NV 0x000000e0 +#define NV34TCL_RC_IN_RGB_C_INPUT_SHIFT 8 +#define NV34TCL_RC_IN_RGB_C_INPUT_MASK 0x00000f00 +#define NV34TCL_RC_IN_RGB_C_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_IN_RGB_C_INPUT_FOG 0x00000300 +#define NV34TCL_RC_IN_RGB_C_INPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_IN_RGB_C_INPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_IN_RGB_C_INPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_IN_RGB_C_INPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE (1 << 12) +#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA 0x00001000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_SHIFT 13 +#define NV34TCL_RC_IN_RGB_C_MAPPING_MASK 0x0000e000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV 0x00002000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL_NV 0x00004000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE_NV 0x00006000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL_NV 0x00008000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE_NV 0x0000a000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY_NV 0x0000c000 +#define NV34TCL_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE_NV 0x0000e000 +#define NV34TCL_RC_IN_RGB_B_INPUT_SHIFT 16 +#define NV34TCL_RC_IN_RGB_B_INPUT_MASK 0x000f0000 +#define NV34TCL_RC_IN_RGB_B_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0_NV 0x00010000 +#define NV34TCL_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1_NV 0x00020000 +#define NV34TCL_RC_IN_RGB_B_INPUT_FOG 0x00030000 +#define NV34TCL_RC_IN_RGB_B_INPUT_PRIMARY_COLOR_NV 0x00040000 +#define NV34TCL_RC_IN_RGB_B_INPUT_SECONDARY_COLOR_NV 0x00050000 +#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE0_ARB 0x00080000 +#define NV34TCL_RC_IN_RGB_B_INPUT_TEXTURE1_ARB 0x00090000 +#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_NV 0x000c0000 +#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE1_NV 0x000d0000 +#define NV34TCL_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000e0000 +#define NV34TCL_RC_IN_RGB_B_INPUT_E_TIMES_F_NV 0x000f0000 +#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE (1 << 20) +#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA 0x00100000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_SHIFT 21 +#define NV34TCL_RC_IN_RGB_B_MAPPING_MASK 0x00e00000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV 0x00200000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL_NV 0x00400000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE_NV 0x00600000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL_NV 0x00800000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE_NV 0x00a00000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY_NV 0x00c00000 +#define NV34TCL_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE_NV 0x00e00000 +#define NV34TCL_RC_IN_RGB_A_INPUT_SHIFT 24 +#define NV34TCL_RC_IN_RGB_A_INPUT_MASK 0x0f000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_ZERO 0x00000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0_NV 0x01000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1_NV 0x02000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_FOG 0x03000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_PRIMARY_COLOR_NV 0x04000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_SECONDARY_COLOR_NV 0x05000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE0_ARB 0x08000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_TEXTURE1_ARB 0x09000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_NV 0x0c000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE1_NV 0x0d000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0e000000 +#define NV34TCL_RC_IN_RGB_A_INPUT_E_TIMES_F_NV 0x0f000000 +#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE (1 << 28) +#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB 0x00000000 +#define NV34TCL_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA 0x10000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_SHIFT 29 +#define NV34TCL_RC_IN_RGB_A_MAPPING_MASK 0xe0000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY_NV 0x00000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV 0x20000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL_NV 0x40000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE_NV 0x60000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL_NV 0x80000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE_NV 0xa0000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY_NV 0xc0000000 +#define NV34TCL_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE_NV 0xe0000000 +#define NV34TCL_RC_CONSTANT_COLOR0(x) (0x00000908+((x)*32)) +#define NV34TCL_RC_CONSTANT_COLOR0__SIZE 0x00000008 +#define NV34TCL_RC_CONSTANT_COLOR0_B_SHIFT 0 +#define NV34TCL_RC_CONSTANT_COLOR0_B_MASK 0x000000ff +#define NV34TCL_RC_CONSTANT_COLOR0_G_SHIFT 8 +#define NV34TCL_RC_CONSTANT_COLOR0_G_MASK 0x0000ff00 +#define NV34TCL_RC_CONSTANT_COLOR0_R_SHIFT 16 +#define NV34TCL_RC_CONSTANT_COLOR0_R_MASK 0x00ff0000 +#define NV34TCL_RC_CONSTANT_COLOR0_A_SHIFT 24 +#define NV34TCL_RC_CONSTANT_COLOR0_A_MASK 0xff000000 +#define NV34TCL_RC_CONSTANT_COLOR1(x) (0x0000090c+((x)*32)) +#define NV34TCL_RC_CONSTANT_COLOR1__SIZE 0x00000008 +#define NV34TCL_RC_CONSTANT_COLOR1_B_SHIFT 0 +#define NV34TCL_RC_CONSTANT_COLOR1_B_MASK 0x000000ff +#define NV34TCL_RC_CONSTANT_COLOR1_G_SHIFT 8 +#define NV34TCL_RC_CONSTANT_COLOR1_G_MASK 0x0000ff00 +#define NV34TCL_RC_CONSTANT_COLOR1_R_SHIFT 16 +#define NV34TCL_RC_CONSTANT_COLOR1_R_MASK 0x00ff0000 +#define NV34TCL_RC_CONSTANT_COLOR1_A_SHIFT 24 +#define NV34TCL_RC_CONSTANT_COLOR1_A_MASK 0xff000000 +#define NV34TCL_RC_OUT_ALPHA(x) (0x00000910+((x)*32)) +#define NV34TCL_RC_OUT_ALPHA__SIZE 0x00000008 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SHIFT 0 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_MASK 0x0000000f +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_FOG 0x00000003 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV34TCL_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SHIFT 4 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_MASK 0x000000f0 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_FOG 0x00000030 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV34TCL_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SHIFT 8 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_MASK 0x00000f00 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_FOG 0x00000300 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_OUT_ALPHA_CD_DOT_PRODUCT (1 << 12) +#define NV34TCL_RC_OUT_ALPHA_AB_DOT_PRODUCT (1 << 13) +#define NV34TCL_RC_OUT_ALPHA_MUX_SUM (1 << 14) +#define NV34TCL_RC_OUT_ALPHA_BIAS (1 << 15) +#define NV34TCL_RC_OUT_ALPHA_BIAS_NONE 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV34TCL_RC_OUT_ALPHA_SCALE_SHIFT 17 +#define NV34TCL_RC_OUT_ALPHA_SCALE_MASK 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_SCALE_NONE 0x00000000 +#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV34TCL_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV34TCL_RC_OUT_RGB(x) (0x00000914+((x)*32)) +#define NV34TCL_RC_OUT_RGB__SIZE 0x00000008 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SHIFT 0 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_MASK 0x0000000f +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0_NV 0x00000001 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1_NV 0x00000002 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_FOG 0x00000003 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR_NV 0x00000004 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR_NV 0x00000005 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE0_ARB 0x00000008 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_TEXTURE1_ARB 0x00000009 +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_NV 0x0000000c +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE1_NV 0x0000000d +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x0000000e +#define NV34TCL_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F_NV 0x0000000f +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SHIFT 4 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_MASK 0x000000f0 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0_NV 0x00000010 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1_NV 0x00000020 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_FOG 0x00000030 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR_NV 0x00000040 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR_NV 0x00000050 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE0_ARB 0x00000080 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_TEXTURE1_ARB 0x00000090 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_NV 0x000000c0 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE1_NV 0x000000d0 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x000000e0 +#define NV34TCL_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F_NV 0x000000f0 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SHIFT 8 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_MASK 0x00000f00 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_ZERO 0x00000000 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0_NV 0x00000100 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1_NV 0x00000200 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_FOG 0x00000300 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR_NV 0x00000400 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR_NV 0x00000500 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0_ARB 0x00000800 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1_ARB 0x00000900 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_NV 0x00000c00 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE1_NV 0x00000d00 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR_NV 0x00000e00 +#define NV34TCL_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F_NV 0x00000f00 +#define NV34TCL_RC_OUT_RGB_CD_DOT_PRODUCT (1 << 12) +#define NV34TCL_RC_OUT_RGB_AB_DOT_PRODUCT (1 << 13) +#define NV34TCL_RC_OUT_RGB_MUX_SUM (1 << 14) +#define NV34TCL_RC_OUT_RGB_BIAS (1 << 15) +#define NV34TCL_RC_OUT_RGB_BIAS_NONE 0x00000000 +#define NV34TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x00008000 +#define NV34TCL_RC_OUT_RGB_SCALE_SHIFT 17 +#define NV34TCL_RC_OUT_RGB_SCALE_MASK 0x00000000 +#define NV34TCL_RC_OUT_RGB_SCALE_NONE 0x00000000 +#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO_NV 0x00020000 +#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR_NV 0x00040000 +#define NV34TCL_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF_NV 0x00060000 +#define NV34TCL_VIEWPORT_HORIZ 0x00000a00 +#define NV34TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV34TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV34TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV34TCL_VIEWPORT_VERT 0x00000a04 +#define NV34TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV34TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV34TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV34TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x00000a10 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x00000a14 +#define NV34TCL_LIGHT_MODEL_FRONT_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x00000a18 +#define NV34TCL_VIEWPORT_TRANSLATE_X 0x00000a20 +#define NV34TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 +#define NV34TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 +#define NV34TCL_VIEWPORT_TRANSLATE_W 0x00000a2c +#define NV34TCL_VIEWPORT_SCALE_X 0x00000a30 +#define NV34TCL_VIEWPORT_SCALE_Y 0x00000a34 +#define NV34TCL_VIEWPORT_SCALE_Z 0x00000a38 +#define NV34TCL_VIEWPORT_SCALE_W 0x00000a3c +#define NV34TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 +#define NV34TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 +#define NV34TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 +#define NV34TCL_DEPTH_FUNC 0x00000a6c +#define NV34TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV34TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV34TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV34TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV34TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV34TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV34TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV34TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV34TCL_DEPTH_WRITE_ENABLE 0x00000a70 +#define NV34TCL_DEPTH_TEST_ENABLE 0x00000a74 +#define NV34TCL_POLYGON_OFFSET_FACTOR 0x00000a78 +#define NV34TCL_POLYGON_OFFSET_UNITS 0x00000a7c +#define NV34TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8)) +#define NV34TCL_VTX_ATTR_3I_XY__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3I_XY_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_3I_XY_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8)) +#define NV34TCL_VTX_ATTR_3I_Z__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3I_Z_Z_SHIFT 0 +#define NV34TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff +#define NV34TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) +#define NV34TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV34TCL_TX0_CLIP_PLANE_A(x) (0x00000e00+((x)*16)) +#define NV34TCL_TX0_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX0_CLIP_PLANE_B(x) (0x00000e04+((x)*16)) +#define NV34TCL_TX0_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX0_CLIP_PLANE_C(x) (0x00000e08+((x)*16)) +#define NV34TCL_TX0_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX0_CLIP_PLANE_D(x) (0x00000e0c+((x)*16)) +#define NV34TCL_TX0_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX1_CLIP_PLANE_A(x) (0x00000e40+((x)*16)) +#define NV34TCL_TX1_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX1_CLIP_PLANE_B(x) (0x00000e44+((x)*16)) +#define NV34TCL_TX1_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX1_CLIP_PLANE_C(x) (0x00000e48+((x)*16)) +#define NV34TCL_TX1_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX1_CLIP_PLANE_D(x) (0x00000e4c+((x)*16)) +#define NV34TCL_TX1_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX2_CLIP_PLANE_A(x) (0x00000e80+((x)*16)) +#define NV34TCL_TX2_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX2_CLIP_PLANE_B(x) (0x00000e84+((x)*16)) +#define NV34TCL_TX2_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX2_CLIP_PLANE_C(x) (0x00000e88+((x)*16)) +#define NV34TCL_TX2_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX2_CLIP_PLANE_D(x) (0x00000e8c+((x)*16)) +#define NV34TCL_TX2_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX3_CLIP_PLANE_A(x) (0x00000ec0+((x)*16)) +#define NV34TCL_TX3_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX3_CLIP_PLANE_B(x) (0x00000ec4+((x)*16)) +#define NV34TCL_TX3_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX3_CLIP_PLANE_C(x) (0x00000ec8+((x)*16)) +#define NV34TCL_TX3_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX3_CLIP_PLANE_D(x) (0x00000ecc+((x)*16)) +#define NV34TCL_TX3_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX4_CLIP_PLANE_A(x) (0x00000f00+((x)*16)) +#define NV34TCL_TX4_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX4_CLIP_PLANE_B(x) (0x00000f04+((x)*16)) +#define NV34TCL_TX4_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX4_CLIP_PLANE_C(x) (0x00000f08+((x)*16)) +#define NV34TCL_TX4_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX4_CLIP_PLANE_D(x) (0x00000f0c+((x)*16)) +#define NV34TCL_TX4_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX5_CLIP_PLANE_A(x) (0x00000f40+((x)*16)) +#define NV34TCL_TX5_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX5_CLIP_PLANE_B(x) (0x00000f44+((x)*16)) +#define NV34TCL_TX5_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX5_CLIP_PLANE_C(x) (0x00000f48+((x)*16)) +#define NV34TCL_TX5_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX5_CLIP_PLANE_D(x) (0x00000f4c+((x)*16)) +#define NV34TCL_TX5_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX6_CLIP_PLANE_A(x) (0x00000f80+((x)*16)) +#define NV34TCL_TX6_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX6_CLIP_PLANE_B(x) (0x00000f84+((x)*16)) +#define NV34TCL_TX6_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX6_CLIP_PLANE_C(x) (0x00000f88+((x)*16)) +#define NV34TCL_TX6_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX6_CLIP_PLANE_D(x) (0x00000f8c+((x)*16)) +#define NV34TCL_TX6_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_TX7_CLIP_PLANE_A(x) (0x00000fc0+((x)*16)) +#define NV34TCL_TX7_CLIP_PLANE_A__SIZE 0x00000004 +#define NV34TCL_TX7_CLIP_PLANE_B(x) (0x00000fc4+((x)*16)) +#define NV34TCL_TX7_CLIP_PLANE_B__SIZE 0x00000004 +#define NV34TCL_TX7_CLIP_PLANE_C(x) (0x00000fc8+((x)*16)) +#define NV34TCL_TX7_CLIP_PLANE_C__SIZE 0x00000004 +#define NV34TCL_TX7_CLIP_PLANE_D(x) (0x00000fcc+((x)*16)) +#define NV34TCL_TX7_CLIP_PLANE_D__SIZE 0x00000004 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R(x) (0x00001000+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G(x) (0x00001004+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B(x) (0x00001008+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_AMBIENT_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R(x) (0x0000100c+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G(x) (0x00001010+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B(x) (0x00001014+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_DIFFUSE_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R(x) (0x00001018+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_R__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G(x) (0x0000101c+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_G__SIZE 0x00000008 +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B(x) (0x00001020+((x)*64)) +#define NV34TCL_LIGHT_FRONT_SIDE_PRODUCT_SPECULAR_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_X(x) (0x00001028+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_Y(x) (0x0000102c+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_HALF_VECTOR_Z(x) (0x00001030+((x)*64)) +#define NV34TCL_LIGHT_HALF_VECTOR_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_X(x) (0x00001034+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_Y(x) (0x00001038+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_DIRECTION_Z(x) (0x0000103c+((x)*64)) +#define NV34TCL_LIGHT_DIRECTION_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_A(x) (0x00001200+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_A__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_B(x) (0x00001204+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_B__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_C(x) (0x00001208+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_C__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_X(x) (0x0000120c+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_Y(x) (0x00001210+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_DIR_Z(x) (0x00001214+((x)*64)) +#define NV34TCL_LIGHT_SPOT_DIR_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_SPOT_CUTOFF_D(x) (0x00001218+((x)*64)) +#define NV34TCL_LIGHT_SPOT_CUTOFF_D__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_X(x) (0x0000121c+((x)*64)) +#define NV34TCL_LIGHT_POSITION_X__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_Y(x) (0x00001220+((x)*64)) +#define NV34TCL_LIGHT_POSITION_Y__SIZE 0x00000008 +#define NV34TCL_LIGHT_POSITION_Z(x) (0x00001224+((x)*64)) +#define NV34TCL_LIGHT_POSITION_Z__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_CONSTANT(x) (0x00001228+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_CONSTANT__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_LINEAR(x) (0x0000122c+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_LINEAR__SIZE 0x00000008 +#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC(x) (0x00001230+((x)*64)) +#define NV34TCL_LIGHT_ATTENUATION_QUADRATIC__SIZE 0x00000008 +#define NV34TCL_FRONT_MATERIAL_SHININESS(x) (0x00001400+((x)*4)) +#define NV34TCL_FRONT_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV34TCL_ENABLED_LIGHTS 0x00001420 +#define NV34TCL_FP_REG_CONTROL 0x00001450 +#define NV34TCL_FP_REG_CONTROL_UNK1_SHIFT 16 +#define NV34TCL_FP_REG_CONTROL_UNK1_MASK 0xffff0000 +#define NV34TCL_FP_REG_CONTROL_UNK0_SHIFT 0 +#define NV34TCL_FP_REG_CONTROL_UNK0_MASK 0x0000ffff +#define NV34TCL_VP_CLIP_PLANES_ENABLE 0x00001478 +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 (1 << 1) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 (1 << 5) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 (1 << 9) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 (1 << 13) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 (1 << 17) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5 (1 << 21) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE6 (1 << 25) +#define NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE7 (1 << 29) +#define NV34TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV34TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV34TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV34TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16)) +#define NV34TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 +#define NV34TCL_VP_CLIP_PLANE_A(x) (0x00001600+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_A__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_B(x) (0x00001604+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_B__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_C(x) (0x00001608+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_C__SIZE 0x00000006 +#define NV34TCL_VP_CLIP_PLANE_D(x) (0x0000160c+((x)*16)) +#define NV34TCL_VP_CLIP_PLANE_D__SIZE 0x00000006 +#define NV34TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4)) +#define NV34TCL_VTXBUF_ADDRESS__SIZE 0x00000010 +#define NV34TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) +#define NV34TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 +#define NV34TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV34TCL_VTXFMT(x) (0x00001740+((x)*4)) +#define NV34TCL_VTXFMT__SIZE 0x00000010 +#define NV34TCL_VTXFMT_TYPE_SHIFT 0 +#define NV34TCL_VTXFMT_TYPE_MASK 0x0000000f +#define NV34TCL_VTXFMT_TYPE_FLOAT 0x00000002 +#define NV34TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV34TCL_VTXFMT_TYPE_USHORT 0x00000005 +#define NV34TCL_VTXFMT_SIZE_SHIFT 4 +#define NV34TCL_VTXFMT_SIZE_MASK 0x000000f0 +#define NV34TCL_VTXFMT_STRIDE_SHIFT 8 +#define NV34TCL_VTXFMT_STRIDE_MASK 0x0000ff00 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_R 0x000017a0 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_G 0x000017a4 +#define NV34TCL_LIGHT_MODEL_BACK_SIDE_PRODUCT_AMBIENT_PLUS_EMISSION_B 0x000017a8 +#define NV34TCL_COLOR_MATERIAL_BACK_R 0x000017b0 +#define NV34TCL_COLOR_MATERIAL_BACK_G 0x000017b4 +#define NV34TCL_COLOR_MATERIAL_BACK_B 0x000017b8 +#define NV34TCL_COLOR_MATERIAL_BACK_A 0x000017c0 +#define NV34TCL_QUERY_RESET 0x000017c8 +#define NV34TCL_QUERY_UNK17CC 0x000017cc +#define NV34TCL_QUERY_GET 0x00001800 +#define NV34TCL_QUERY_GET_UNK24_SHIFT 24 +#define NV34TCL_QUERY_GET_UNK24_MASK 0xff000000 +#define NV34TCL_QUERY_GET_OFFSET_SHIFT 0 +#define NV34TCL_QUERY_GET_OFFSET_MASK 0x00ffffff +#define NV34TCL_VERTEX_BEGIN_END 0x00001808 +#define NV34TCL_VERTEX_BEGIN_END_STOP 0x00000000 +#define NV34TCL_VERTEX_BEGIN_END_POINTS 0x00000001 +#define NV34TCL_VERTEX_BEGIN_END_LINES 0x00000002 +#define NV34TCL_VERTEX_BEGIN_END_LINE_LOOP 0x00000003 +#define NV34TCL_VERTEX_BEGIN_END_LINE_STRIP 0x00000004 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLES 0x00000005 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV34TCL_VERTEX_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV34TCL_VERTEX_BEGIN_END_QUADS 0x00000008 +#define NV34TCL_VERTEX_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV34TCL_VERTEX_BEGIN_END_POLYGON 0x0000000a +#define NV34TCL_VB_ELEMENT_U16 0x0000180c +#define NV34TCL_VB_ELEMENT_U16_I0_SHIFT 0 +#define NV34TCL_VB_ELEMENT_U16_I0_MASK 0x0000ffff +#define NV34TCL_VB_ELEMENT_U16_I1_SHIFT 16 +#define NV34TCL_VB_ELEMENT_U16_I1_MASK 0xffff0000 +#define NV34TCL_VB_ELEMENT_U32 0x00001810 +#define NV34TCL_VB_VERTEX_BATCH 0x00001814 +#define NV34TCL_VB_VERTEX_BATCH_OFFSET_SHIFT 0 +#define NV34TCL_VB_VERTEX_BATCH_OFFSET_MASK 0x00ffffff +#define NV34TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 +#define NV34TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 +#define NV34TCL_VERTEX_DATA 0x00001818 +#define NV34TCL_IDXBUF_ADDRESS 0x0000181c +#define NV34TCL_IDXBUF_FORMAT 0x00001820 +#define NV34TCL_IDXBUF_FORMAT_TYPE_SHIFT 4 +#define NV34TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0 +#define NV34TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV34TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010 +#define NV34TCL_IDXBUF_FORMAT_DMA1 (1 << 0) +#define NV34TCL_VB_INDEX_BATCH 0x00001824 +#define NV34TCL_VB_INDEX_BATCH_COUNT_SHIFT 24 +#define NV34TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000 +#define NV34TCL_VB_INDEX_BATCH_START_SHIFT 0 +#define NV34TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff +#define NV34TCL_POLYGON_MODE_FRONT 0x00001828 +#define NV34TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV34TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV34TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV34TCL_POLYGON_MODE_BACK 0x0000182c +#define NV34TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV34TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV34TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV34TCL_CULL_FACE 0x00001830 +#define NV34TCL_CULL_FACE_FRONT 0x00000404 +#define NV34TCL_CULL_FACE_BACK 0x00000405 +#define NV34TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV34TCL_FRONT_FACE 0x00001834 +#define NV34TCL_FRONT_FACE_CW 0x00000900 +#define NV34TCL_FRONT_FACE_CCW 0x00000901 +#define NV34TCL_POLYGON_SMOOTH_ENABLE 0x00001838 +#define NV34TCL_CULL_FACE_ENABLE 0x0000183c +#define NV34TCL_TX_PALETTE_OFFSET(x) (0x00001840+((x)*4)) +#define NV34TCL_TX_PALETTE_OFFSET__SIZE 0x00000004 +#define NV34TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) +#define NV34TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) +#define NV34TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) +#define NV34TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4)) +#define NV34TCL_VTX_ATTR_4UB__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4UB_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_4UB_X_MASK 0x000000ff +#define NV34TCL_VTX_ATTR_4UB_Y_SHIFT 8 +#define NV34TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00 +#define NV34TCL_VTX_ATTR_4UB_Z_SHIFT 16 +#define NV34TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000 +#define NV34TCL_VTX_ATTR_4UB_W_SHIFT 24 +#define NV34TCL_VTX_ATTR_4UB_W_MASK 0xff000000 +#define NV34TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8)) +#define NV34TCL_VTX_ATTR_4I_XY__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4I_XY_X_SHIFT 0 +#define NV34TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_4I_XY_Y_SHIFT 16 +#define NV34TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000 +#define NV34TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8)) +#define NV34TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0 +#define NV34TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff +#define NV34TCL_VTX_ATTR_4I_ZW_W_SHIFT 16 +#define NV34TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000 +#define NV34TCL_TX_OFFSET(x) (0x00001a00+((x)*32)) +#define NV34TCL_TX_OFFSET__SIZE 0x00000004 +#define NV34TCL_TX_FORMAT(x) (0x00001a04+((x)*32)) +#define NV34TCL_TX_FORMAT__SIZE 0x00000004 +#define NV34TCL_TX_FORMAT_DMA0 (1 << 0) +#define NV34TCL_TX_FORMAT_DMA1 (1 << 1) +#define NV34TCL_TX_FORMAT_CUBIC (1 << 2) +#define NV34TCL_TX_FORMAT_NO_BORDER (1 << 3) +#define NV34TCL_TX_FORMAT_DIMS_SHIFT 4 +#define NV34TCL_TX_FORMAT_DIMS_MASK 0x000000f0 +#define NV34TCL_TX_FORMAT_DIMS_1D 0x00000010 +#define NV34TCL_TX_FORMAT_DIMS_2D 0x00000020 +#define NV34TCL_TX_FORMAT_DIMS_3D 0x00000030 +#define NV34TCL_TX_FORMAT_FORMAT_SHIFT 8 +#define NV34TCL_TX_FORMAT_FORMAT_MASK 0x0000ff00 +#define NV34TCL_TX_FORMAT_FORMAT_L8 0x00000000 +#define NV34TCL_TX_FORMAT_FORMAT_A8 0x00000100 +#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT 0x00000300 +#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4 0x00000400 +#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5 0x00000500 +#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8 0x00000600 +#define NV34TCL_TX_FORMAT_FORMAT_X8R8G8B8 0x00000700 +#define NV34TCL_TX_FORMAT_FORMAT_INDEX8 0x00000b00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT1 0x00000c00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT3 0x00000e00 +#define NV34TCL_TX_FORMAT_FORMAT_DXT5 0x00000f00 +#define NV34TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT 0x00001000 +#define NV34TCL_TX_FORMAT_FORMAT_R5G6B5_RECT 0x00001100 +#define NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT 0x00001200 +#define NV34TCL_TX_FORMAT_FORMAT_L8_RECT 0x00001300 +#define NV34TCL_TX_FORMAT_FORMAT_A8L8 0x00001a00 +#define NV34TCL_TX_FORMAT_FORMAT_A8_RECT2 0x00001b00 +#define NV34TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT 0x00001d00 +#define NV34TCL_TX_FORMAT_FORMAT_R8G8B8_RECT 0x00001e00 +#define NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT 0x00002000 +#define NV34TCL_TX_FORMAT_FORMAT_DSDT 0x00002800 +#define NV34TCL_TX_FORMAT_FORMAT_A16 0x00003200 +#define NV34TCL_TX_FORMAT_FORMAT_HILO16 0x00003300 +#define NV34TCL_TX_FORMAT_FORMAT_A16_RECT 0x00003500 +#define NV34TCL_TX_FORMAT_FORMAT_HILO16_RECT 0x00003600 +#define NV34TCL_TX_FORMAT_FORMAT_HILO8 0x00004400 +#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8 0x00004500 +#define NV34TCL_TX_FORMAT_FORMAT_HILO8_RECT 0x00004600 +#define NV34TCL_TX_FORMAT_FORMAT_SIGNED_HILO8_RECT 0x00004700 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA16_NV 0x00004a00 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_RGBA32_NV 0x00004b00 +#define NV34TCL_TX_FORMAT_FORMAT_FLOAT_R32_NV 0x00004c00 +#define NV34TCL_TX_FORMAT_MIPMAP (1 << 19) +#define NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT 20 +#define NV34TCL_TX_FORMAT_BASE_SIZE_U_MASK 0x00f00000 +#define NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT 24 +#define NV34TCL_TX_FORMAT_BASE_SIZE_V_MASK 0x0f000000 +#define NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT 28 +#define NV34TCL_TX_FORMAT_BASE_SIZE_W_MASK 0xf0000000 +#define NV34TCL_TX_WRAP(x) (0x00001a08+((x)*32)) +#define NV34TCL_TX_WRAP__SIZE 0x00000004 +#define NV34TCL_TX_WRAP_S_SHIFT 0 +#define NV34TCL_TX_WRAP_S_MASK 0x000000ff +#define NV34TCL_TX_WRAP_S_REPEAT 0x00000001 +#define NV34TCL_TX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV34TCL_TX_WRAP_S_CLAMP 0x00000005 +#define NV34TCL_TX_WRAP_T_SHIFT 8 +#define NV34TCL_TX_WRAP_T_MASK 0x00000f00 +#define NV34TCL_TX_WRAP_T_REPEAT 0x00000100 +#define NV34TCL_TX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV34TCL_TX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV34TCL_TX_WRAP_T_CLAMP 0x00000500 +#define NV34TCL_TX_WRAP_EXPAND_NORMAL_SHIFT 12 +#define NV34TCL_TX_WRAP_EXPAND_NORMAL_MASK 0x0000f000 +#define NV34TCL_TX_WRAP_R_SHIFT 16 +#define NV34TCL_TX_WRAP_R_MASK 0x000f0000 +#define NV34TCL_TX_WRAP_R_REPEAT 0x00010000 +#define NV34TCL_TX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV34TCL_TX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV34TCL_TX_WRAP_R_CLAMP 0x00050000 +#define NV34TCL_TX_WRAP_RCOMP_SHIFT 28 +#define NV34TCL_TX_WRAP_RCOMP_MASK 0xf0000000 +#define NV34TCL_TX_WRAP_RCOMP_NEVER 0x00000000 +#define NV34TCL_TX_WRAP_RCOMP_GREATER 0x10000000 +#define NV34TCL_TX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV34TCL_TX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV34TCL_TX_WRAP_RCOMP_LESS 0x40000000 +#define NV34TCL_TX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV34TCL_TX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV34TCL_TX_WRAP_RCOMP_ALWAYS 0x70000000 +#define NV34TCL_TX_ENABLE(x) (0x00001a0c+((x)*32)) +#define NV34TCL_TX_ENABLE__SIZE 0x00000004 +#define NV34TCL_TX_ENABLE_ANISO_SHIFT 4 +#define NV34TCL_TX_ENABLE_ANISO_MASK 0x00000030 +#define NV34TCL_TX_ENABLE_ANISO_NONE 0x00000000 +#define NV34TCL_TX_ENABLE_ANISO_2X 0x00000010 +#define NV34TCL_TX_ENABLE_ANISO_4X 0x00000020 +#define NV34TCL_TX_ENABLE_ANISO_8X 0x00000030 +#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT 14 +#define NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_MASK 0x0003c000 +#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT 26 +#define NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_MASK 0x3c000000 +#define NV34TCL_TX_ENABLE_ENABLE (1 << 30) +#define NV34TCL_TX_SWIZZLE(x) (0x00001a10+((x)*32)) +#define NV34TCL_TX_SWIZZLE__SIZE 0x00000004 +#define NV34TCL_TX_SWIZZLE_S0_X_SHIFT 14 +#define NV34TCL_TX_SWIZZLE_S0_X_MASK 0x0000c000 +#define NV34TCL_TX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV34TCL_TX_SWIZZLE_S0_X_S1 0x00008000 +#define NV34TCL_TX_SWIZZLE_S0_Y_SHIFT 12 +#define NV34TCL_TX_SWIZZLE_S0_Y_MASK 0x00003000 +#define NV34TCL_TX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV34TCL_TX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV34TCL_TX_SWIZZLE_S0_Z_SHIFT 10 +#define NV34TCL_TX_SWIZZLE_S0_Z_MASK 0x00000c00 +#define NV34TCL_TX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV34TCL_TX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV34TCL_TX_SWIZZLE_S0_W_SHIFT 8 +#define NV34TCL_TX_SWIZZLE_S0_W_MASK 0x00000300 +#define NV34TCL_TX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV34TCL_TX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV34TCL_TX_SWIZZLE_S0_W_S1 0x00000200 +#define NV34TCL_TX_SWIZZLE_S1_X_SHIFT 6 +#define NV34TCL_TX_SWIZZLE_S1_X_MASK 0x000000c0 +#define NV34TCL_TX_SWIZZLE_S1_X_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_X_Z 0x00000040 +#define NV34TCL_TX_SWIZZLE_S1_X_Y 0x00000080 +#define NV34TCL_TX_SWIZZLE_S1_X_X 0x000000c0 +#define NV34TCL_TX_SWIZZLE_S1_Y_SHIFT 4 +#define NV34TCL_TX_SWIZZLE_S1_Y_MASK 0x00000030 +#define NV34TCL_TX_SWIZZLE_S1_Y_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV34TCL_TX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV34TCL_TX_SWIZZLE_S1_Y_X 0x00000030 +#define NV34TCL_TX_SWIZZLE_S1_Z_SHIFT 2 +#define NV34TCL_TX_SWIZZLE_S1_Z_MASK 0x0000000c +#define NV34TCL_TX_SWIZZLE_S1_Z_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV34TCL_TX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV34TCL_TX_SWIZZLE_S1_Z_X 0x0000000c +#define NV34TCL_TX_SWIZZLE_S1_W_SHIFT 0 +#define NV34TCL_TX_SWIZZLE_S1_W_MASK 0x00000003 +#define NV34TCL_TX_SWIZZLE_S1_W_W 0x00000000 +#define NV34TCL_TX_SWIZZLE_S1_W_Z 0x00000001 +#define NV34TCL_TX_SWIZZLE_S1_W_Y 0x00000002 +#define NV34TCL_TX_SWIZZLE_S1_W_X 0x00000003 +#define NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT 16 +#define NV34TCL_TX_SWIZZLE_RECT_PITCH_MASK 0xffff0000 +#define NV34TCL_TX_FILTER(x) (0x00001a14+((x)*32)) +#define NV34TCL_TX_FILTER__SIZE 0x00000004 +#define NV34TCL_TX_FILTER_LOD_BIAS_SHIFT 8 +#define NV34TCL_TX_FILTER_LOD_BIAS_MASK 0x00000f00 +#define NV34TCL_TX_FILTER_MINIFY_SHIFT 16 +#define NV34TCL_TX_FILTER_MINIFY_MASK 0x000f0000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST 0x00010000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR 0x00020000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV34TCL_TX_FILTER_MAGNIFY_SHIFT 24 +#define NV34TCL_TX_FILTER_MAGNIFY_MASK 0x0f000000 +#define NV34TCL_TX_FILTER_MAGNIFY_NEAREST 0x01000000 +#define NV34TCL_TX_FILTER_MAGNIFY_LINEAR 0x02000000 +#define NV34TCL_TX_FILTER_SIGNED_BLUE (1 << 28) +#define NV34TCL_TX_FILTER_SIGNED_GREEN (1 << 29) +#define NV34TCL_TX_FILTER_SIGNED_RED (1 << 30) +#define NV34TCL_TX_FILTER_SIGNED_ALPHA (1 << 31) +#define NV34TCL_TX_NPOT_SIZE(x) (0x00001a18+((x)*32)) +#define NV34TCL_TX_NPOT_SIZE__SIZE 0x00000004 +#define NV34TCL_TX_NPOT_SIZE_H_SHIFT 0 +#define NV34TCL_TX_NPOT_SIZE_H_MASK 0x0000ffff +#define NV34TCL_TX_NPOT_SIZE_W_SHIFT 16 +#define NV34TCL_TX_NPOT_SIZE_W_MASK 0xffff0000 +#define NV34TCL_TX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) +#define NV34TCL_TX_BORDER_COLOR__SIZE 0x00000004 +#define NV34TCL_TX_BORDER_COLOR_B_SHIFT 0 +#define NV34TCL_TX_BORDER_COLOR_B_MASK 0x000000ff +#define NV34TCL_TX_BORDER_COLOR_G_SHIFT 8 +#define NV34TCL_TX_BORDER_COLOR_G_MASK 0x0000ff00 +#define NV34TCL_TX_BORDER_COLOR_R_SHIFT 16 +#define NV34TCL_TX_BORDER_COLOR_R_MASK 0x00ff0000 +#define NV34TCL_TX_BORDER_COLOR_A_SHIFT 24 +#define NV34TCL_TX_BORDER_COLOR_A_MASK 0xff000000 +#define NV34TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV34TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) +#define NV34TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV34TCL_FP_CONTROL 0x00001d60 +#define NV34TCL_FP_CONTROL_USES_KIL (1 << 7) +#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_SHIFT 0 +#define NV34TCL_FP_CONTROL_USED_REGS_MINUS1_DIV2_MASK 0x0000000f +#define NV34TCL_DEPTH_UNK17D8 0x00001d78 +#define NV34TCL_DEPTH_UNK17D8_CLAMP_SHIFT 4 +#define NV34TCL_DEPTH_UNK17D8_CLAMP_MASK 0x000000f0 +#define NV34TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV34TCL_MULTISAMPLE_CONTROL_ENABLE (1 << 0) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_COVERAGE (1 << 4) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_ALPHA_TO_ONE (1 << 8) +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_SHIFT 16 +#define NV34TCL_MULTISAMPLE_CONTROL_SAMPLE_COVERAGE_MASK 0xffff0000 +#define NV34TCL_CLEAR_DEPTH_VALUE 0x00001d8c +#define NV34TCL_CLEAR_COLOR_VALUE 0x00001d90 +#define NV34TCL_CLEAR_COLOR_VALUE_B_SHIFT 0 +#define NV34TCL_CLEAR_COLOR_VALUE_B_MASK 0x000000ff +#define NV34TCL_CLEAR_COLOR_VALUE_G_SHIFT 8 +#define NV34TCL_CLEAR_COLOR_VALUE_G_MASK 0x0000ff00 +#define NV34TCL_CLEAR_COLOR_VALUE_R_SHIFT 16 +#define NV34TCL_CLEAR_COLOR_VALUE_R_MASK 0x00ff0000 +#define NV34TCL_CLEAR_COLOR_VALUE_A_SHIFT 24 +#define NV34TCL_CLEAR_COLOR_VALUE_A_MASK 0xff000000 +#define NV34TCL_CLEAR_BUFFERS 0x00001d94 +#define NV34TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) +#define NV34TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) +#define NV34TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) +#define NV34TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) +#define NV34TCL_CLEAR_BUFFERS_STENCIL (1 << 1) +#define NV34TCL_CLEAR_BUFFERS_DEPTH (1 << 0) +#define NV34TCL_DO_VERTICES 0x00001dac +#define NV34TCL_LINE_STIPPLE_ENABLE 0x00001db4 +#define NV34TCL_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 +#define NV34TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff +#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 +#define NV34TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 +#define NV34TCL_BACK_MATERIAL_SHININESS(x) (0x00001e20+((x)*4)) +#define NV34TCL_BACK_MATERIAL_SHININESS__SIZE 0x00000006 +#define NV34TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4)) +#define NV34TCL_VTX_ATTR_1F__SIZE 0x00000010 +#define NV34TCL_ENGINE 0x00001e94 +#define NV34TCL_ENGINE_FP (1 << 0) +#define NV34TCL_ENGINE_VP (1 << 1) +#define NV34TCL_ENGINE_FIXED (1 << 2) +#define NV34TCL_VP_UPLOAD_FROM_ID 0x00001e9c +#define NV34TCL_VP_START_FROM_ID 0x00001ea0 +#define NV34TCL_POINT_PARAMETERS(x) (0x00001ec0+((x)*4)) +#define NV34TCL_POINT_PARAMETERS__SIZE 0x00000008 +#define NV34TCL_POINT_SIZE 0x00001ee0 +#define NV34TCL_POINT_PARAMETERS_ENABLE 0x00001ee4 +#define NV34TCL_POINT_SPRITE 0x00001ee8 +#define NV34TCL_POINT_SPRITE_ENABLE (1 << 0) +#define NV34TCL_POINT_SPRITE_R_MODE_SHIFT 1 +#define NV34TCL_POINT_SPRITE_R_MODE_MASK 0x00000006 +#define NV34TCL_POINT_SPRITE_R_MODE_ZERO 0x00000000 +#define NV34TCL_POINT_SPRITE_R_MODE_R 0x00000002 +#define NV34TCL_POINT_SPRITE_R_MODE_S 0x00000004 +#define NV34TCL_POINT_SPRITE_COORD_REPLACE (1 << 11) +#define NV34TCL_VP_UPLOAD_CONST_ID 0x00001efc +#define NV34TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 +#define NV34TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) +#define NV34TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 +#define NV34TCL_UNK1f80(x) (0x00001f80+((x)*4)) +#define NV34TCL_UNK1f80__SIZE 0x00000010 + + +#define NV40_CONTEXT_SURFACES_2D 0x00003062 + + + +#define NV40_STRETCHED_IMAGE_FROM_CPU 0x00003066 + + + +#define NV40_TEXTURE_FROM_CPU 0x0000307b + + + +#define NV40_SCALED_IMAGE_FROM_MEMORY 0x00003089 + + + +#define NV40_IMAGE_FROM_CPU 0x0000308a + + + +#define NV40_SWIZZLED_SURFACE 0x0000309e + + + +#define NV40TCL 0x00004097 + +#define NV40TCL_REF_CNT 0x00000050 +#define NV40TCL_NOP 0x00000100 +#define NV40TCL_NOTIFY 0x00000104 +#define NV40TCL_DMA_NOTIFY 0x00000180 +#define NV40TCL_DMA_TEXTURE0 0x00000184 +#define NV40TCL_DMA_TEXTURE1 0x00000188 +#define NV40TCL_DMA_COLOR1 0x0000018c +#define NV40TCL_DMA_COLOR0 0x00000194 +#define NV40TCL_DMA_ZETA 0x00000198 +#define NV40TCL_DMA_VTXBUF0 0x0000019c +#define NV40TCL_DMA_VTXBUF1 0x000001a0 +#define NV40TCL_DMA_FENCE 0x000001a4 +#define NV40TCL_DMA_QUERY 0x000001a8 +#define NV40TCL_DMA_UNK01AC 0x000001ac +#define NV40TCL_DMA_UNK01B0 0x000001b0 +#define NV40TCL_DMA_COLOR2 0x000001b4 +#define NV40TCL_DMA_COLOR3 0x000001b8 +#define NV40TCL_RT_HORIZ 0x00000200 +#define NV40TCL_RT_HORIZ_W_SHIFT 16 +#define NV40TCL_RT_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_RT_HORIZ_X_SHIFT 0 +#define NV40TCL_RT_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_RT_VERT 0x00000204 +#define NV40TCL_RT_VERT_H_SHIFT 16 +#define NV40TCL_RT_VERT_H_MASK 0xffff0000 +#define NV40TCL_RT_VERT_Y_SHIFT 0 +#define NV40TCL_RT_VERT_Y_MASK 0x0000ffff +#define NV40TCL_RT_FORMAT 0x00000208 +#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT 24 +#define NV40TCL_RT_FORMAT_LOG2_HEIGHT_MASK 0xff000000 +#define NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT 16 +#define NV40TCL_RT_FORMAT_LOG2_WIDTH_MASK 0x00ff0000 +#define NV40TCL_RT_FORMAT_TYPE_SHIFT 8 +#define NV40TCL_RT_FORMAT_TYPE_MASK 0x00000f00 +#define NV40TCL_RT_FORMAT_TYPE_LINEAR 0x00000100 +#define NV40TCL_RT_FORMAT_TYPE_SWIZZLED 0x00000200 +#define NV40TCL_RT_FORMAT_ZETA_SHIFT 5 +#define NV40TCL_RT_FORMAT_ZETA_MASK 0x000000e0 +#define NV40TCL_RT_FORMAT_ZETA_Z16 0x00000020 +#define NV40TCL_RT_FORMAT_ZETA_Z24S8 0x00000040 +#define NV40TCL_RT_FORMAT_COLOR_SHIFT 0 +#define NV40TCL_RT_FORMAT_COLOR_MASK 0x0000001f +#define NV40TCL_RT_FORMAT_COLOR_R5G6B5 0x00000003 +#define NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 0x00000005 +#define NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 0x00000008 +#define NV40TCL_RT_FORMAT_COLOR_B8 0x00000009 +#define NV40TCL_RT_FORMAT_COLOR_UNKNOWN 0x0000000d +#define NV40TCL_RT_FORMAT_COLOR_X8B8G8R8 0x0000000f +#define NV40TCL_RT_FORMAT_COLOR_A8B8G8R8 0x00000010 +#define NV40TCL_COLOR0_PITCH 0x0000020c +#define NV40TCL_COLOR0_OFFSET 0x00000210 +#define NV40TCL_ZETA_OFFSET 0x00000214 +#define NV40TCL_COLOR1_OFFSET 0x00000218 +#define NV40TCL_COLOR1_PITCH 0x0000021c +#define NV40TCL_RT_ENABLE 0x00000220 +#define NV40TCL_RT_ENABLE_MRT (1 << 4) +#define NV40TCL_RT_ENABLE_COLOR3 (1 << 3) +#define NV40TCL_RT_ENABLE_COLOR2 (1 << 2) +#define NV40TCL_RT_ENABLE_COLOR1 (1 << 1) +#define NV40TCL_RT_ENABLE_COLOR0 (1 << 0) +#define NV40TCL_ZETA_PITCH 0x0000022c +#define NV40TCL_COLOR2_PITCH 0x00000280 +#define NV40TCL_COLOR3_PITCH 0x00000284 +#define NV40TCL_COLOR2_OFFSET 0x00000288 +#define NV40TCL_COLOR3_OFFSET 0x0000028c +#define NV40TCL_VIEWPORT_CLIP_HORIZ(x) (0x000002c0+((x)*8)) +#define NV40TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV40TCL_VIEWPORT_CLIP_VERT(x) (0x000002c4+((x)*8)) +#define NV40TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV40TCL_DITHER_ENABLE 0x00000300 +#define NV40TCL_ALPHA_TEST_ENABLE 0x00000304 +#define NV40TCL_ALPHA_TEST_FUNC 0x00000308 +#define NV40TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV40TCL_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV40TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV40TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV40TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV40TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV40TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 +#define NV40TCL_ALPHA_TEST_REF 0x0000030c +#define NV40TCL_BLEND_ENABLE 0x00000310 +#define NV40TCL_BLEND_FUNC_SRC 0x00000314 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SHIFT 0 +#define NV40TCL_BLEND_FUNC_SRC_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV40TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV40TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV40TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV40TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x03000000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x03020000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x03040000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x03060000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV40TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV40TCL_BLEND_FUNC_DST 0x00000318 +#define NV40TCL_BLEND_FUNC_DST_RGB_SHIFT 0 +#define NV40TCL_BLEND_FUNC_DST_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV40TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV40TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV40TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV40TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV40TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x03000000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x03010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x03020000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x03030000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x03040000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x03050000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x03060000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x03070000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x03080000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x80010000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x80020000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x80030000 +#define NV40TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x80040000 +#define NV40TCL_BLEND_COLOR 0x0000031c +#define NV40TCL_BLEND_EQUATION 0x00000320 +#define NV40TCL_BLEND_EQUATION_RGB_SHIFT 0 +#define NV40TCL_BLEND_EQUATION_RGB_MASK 0x0000ffff +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV40TCL_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV40TCL_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV40TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV40TCL_BLEND_EQUATION_ALPHA_SHIFT 16 +#define NV40TCL_BLEND_EQUATION_ALPHA_MASK 0xffff0000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x80060000 +#define NV40TCL_BLEND_EQUATION_ALPHA_MIN 0x80070000 +#define NV40TCL_BLEND_EQUATION_ALPHA_MAX 0x80080000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x800a0000 +#define NV40TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x800b0000 +#define NV40TCL_COLOR_MASK 0x00000324 +#define NV40TCL_COLOR_MASK_BUFFER0_B_SHIFT 0 +#define NV40TCL_COLOR_MASK_BUFFER0_B_MASK 0x000000ff +#define NV40TCL_COLOR_MASK_BUFFER0_G_SHIFT 8 +#define NV40TCL_COLOR_MASK_BUFFER0_G_MASK 0x0000ff00 +#define NV40TCL_COLOR_MASK_BUFFER0_R_SHIFT 16 +#define NV40TCL_COLOR_MASK_BUFFER0_R_MASK 0x00ff0000 +#define NV40TCL_COLOR_MASK_BUFFER0_A_SHIFT 24 +#define NV40TCL_COLOR_MASK_BUFFER0_A_MASK 0xff000000 +#define NV40TCL_STENCIL_FRONT_ENABLE 0x00000328 +#define NV40TCL_STENCIL_FRONT_MASK 0x0000032c +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC 0x00000330 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV40TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV40TCL_STENCIL_FRONT_FUNC_REF 0x00000334 +#define NV40TCL_STENCIL_FRONT_FUNC_MASK 0x00000338 +#define NV40TCL_STENCIL_FRONT_OP_FAIL 0x0000033c +#define NV40TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL 0x00000340 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS 0x00000344 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_ENABLE 0x00000348 +#define NV40TCL_STENCIL_BACK_MASK 0x0000034c +#define NV40TCL_STENCIL_BACK_FUNC_FUNC 0x00000350 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV40TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV40TCL_STENCIL_BACK_FUNC_REF 0x00000354 +#define NV40TCL_STENCIL_BACK_FUNC_MASK 0x00000358 +#define NV40TCL_STENCIL_BACK_OP_FAIL 0x0000035c +#define NV40TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL 0x00000360 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV40TCL_STENCIL_BACK_OP_ZPASS 0x00000364 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV40TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV40TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV40TCL_SHADE_MODEL 0x00000368 +#define NV40TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV40TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV40TCL_MRT_COLOR_MASK 0x00000370 +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_A (1 << 4) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_R (1 << 5) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_G (1 << 6) +#define NV40TCL_MRT_COLOR_MASK_BUFFER1_B (1 << 7) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_A (1 << 8) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_R (1 << 9) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_G (1 << 10) +#define NV40TCL_MRT_COLOR_MASK_BUFFER2_B (1 << 11) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_A (1 << 12) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_R (1 << 13) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_G (1 << 14) +#define NV40TCL_MRT_COLOR_MASK_BUFFER3_B (1 << 15) +#define NV40TCL_COLOR_LOGIC_OP_ENABLE 0x00000374 +#define NV40TCL_COLOR_LOGIC_OP 0x00000378 +#define NV40TCL_COLOR_LOGIC_OP_CLEAR 0x00001500 +#define NV40TCL_COLOR_LOGIC_OP_AND 0x00001501 +#define NV40TCL_COLOR_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV40TCL_COLOR_LOGIC_OP_COPY 0x00001503 +#define NV40TCL_COLOR_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV40TCL_COLOR_LOGIC_OP_NOOP 0x00001505 +#define NV40TCL_COLOR_LOGIC_OP_XOR 0x00001506 +#define NV40TCL_COLOR_LOGIC_OP_OR 0x00001507 +#define NV40TCL_COLOR_LOGIC_OP_NOR 0x00001508 +#define NV40TCL_COLOR_LOGIC_OP_EQUIV 0x00001509 +#define NV40TCL_COLOR_LOGIC_OP_INVERT 0x0000150a +#define NV40TCL_COLOR_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV40TCL_COLOR_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV40TCL_COLOR_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV40TCL_COLOR_LOGIC_OP_NAND 0x0000150e +#define NV40TCL_COLOR_LOGIC_OP_SET 0x0000150f +#define NV40TCL_DEPTH_RANGE_NEAR 0x00000394 +#define NV40TCL_DEPTH_RANGE_FAR 0x00000398 +#define NV40TCL_LINE_WIDTH 0x000003b8 +#define NV40TCL_LINE_SMOOTH_ENABLE 0x000003bc +#define NV40TCL_UNK03C0(x) (0x000003c0+((x)*4)) +#define NV40TCL_UNK03C0__SIZE 0x00000010 +#define NV40TCL_UNK0400(x) (0x00000400+((x)*4)) +#define NV40TCL_UNK0400__SIZE 0x00000010 +#define NV40TCL_UNK0440(x) (0x00000440+((x)*4)) +#define NV40TCL_UNK0440__SIZE 0x00000020 +#define NV40TCL_SCISSOR_HORIZ 0x000008c0 +#define NV40TCL_SCISSOR_HORIZ_X_SHIFT 0 +#define NV40TCL_SCISSOR_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_SCISSOR_HORIZ_W_SHIFT 16 +#define NV40TCL_SCISSOR_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_SCISSOR_VERT 0x000008c4 +#define NV40TCL_SCISSOR_VERT_Y_SHIFT 0 +#define NV40TCL_SCISSOR_VERT_Y_MASK 0x0000ffff +#define NV40TCL_SCISSOR_VERT_H_SHIFT 16 +#define NV40TCL_SCISSOR_VERT_H_MASK 0xffff0000 +#define NV40TCL_FOG_MODE 0x000008cc +#define NV40TCL_FOG_EQUATION_CONSTANT 0x000008d0 +#define NV40TCL_FOG_EQUATION_LINEAR 0x000008d4 +#define NV40TCL_FOG_EQUATION_QUADRATIC 0x000008d8 +#define NV40TCL_FP_ADDRESS 0x000008e4 +#define NV40TCL_FP_ADDRESS_OFFSET_SHIFT 8 +#define NV40TCL_FP_ADDRESS_OFFSET_MASK 0xffffff00 +#define NV40TCL_FP_ADDRESS_DMA1 (1 << 1) +#define NV40TCL_FP_ADDRESS_DMA0 (1 << 0) +#define NV40TCL_VIEWPORT_HORIZ 0x00000a00 +#define NV40TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV40TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV40TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV40TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV40TCL_VIEWPORT_VERT 0x00000a04 +#define NV40TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV40TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV40TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV40TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV40TCL_VIEWPORT_TRANSLATE_X 0x00000a20 +#define NV40TCL_VIEWPORT_TRANSLATE_Y 0x00000a24 +#define NV40TCL_VIEWPORT_TRANSLATE_Z 0x00000a28 +#define NV40TCL_VIEWPORT_TRANSLATE_W 0x00000a2c +#define NV40TCL_VIEWPORT_SCALE_X 0x00000a30 +#define NV40TCL_VIEWPORT_SCALE_Y 0x00000a34 +#define NV40TCL_VIEWPORT_SCALE_Z 0x00000a38 +#define NV40TCL_VIEWPORT_SCALE_W 0x00000a3c +#define NV40TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000a60 +#define NV40TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000a64 +#define NV40TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000a68 +#define NV40TCL_DEPTH_FUNC 0x00000a6c +#define NV40TCL_DEPTH_FUNC_NEVER 0x00000200 +#define NV40TCL_DEPTH_FUNC_LESS 0x00000201 +#define NV40TCL_DEPTH_FUNC_EQUAL 0x00000202 +#define NV40TCL_DEPTH_FUNC_LEQUAL 0x00000203 +#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV40TCL_DEPTH_FUNC_GREATER 0x00000204 +#define NV40TCL_DEPTH_FUNC_NOTEQUAL 0x00000205 +#define NV40TCL_DEPTH_FUNC_GEQUAL 0x00000206 +#define NV40TCL_DEPTH_FUNC_ALWAYS 0x00000207 +#define NV40TCL_DEPTH_WRITE_ENABLE 0x00000a70 +#define NV40TCL_DEPTH_TEST_ENABLE 0x00000a74 +#define NV40TCL_POLYGON_OFFSET_FACTOR 0x00000a78 +#define NV40TCL_POLYGON_OFFSET_UNITS 0x00000a7c +#define NV40TCL_VTX_ATTR_3I_XY(x) (0x00000a80+((x)*8)) +#define NV40TCL_VTX_ATTR_3I_XY__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3I_XY_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_3I_XY_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_3I_XY_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_3I_XY_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_3I_Z(x) (0x00000a84+((x)*8)) +#define NV40TCL_VTX_ATTR_3I_Z__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3I_Z_Z_SHIFT 0 +#define NV40TCL_VTX_ATTR_3I_Z_Z_MASK 0x0000ffff +#define NV40TCL_UNK0B40(x) (0x00000b40+((x)*4)) +#define NV40TCL_UNK0B40__SIZE 0x00000008 +#define NV40TCL_VP_UPLOAD_INST(x) (0x00000b80+((x)*4)) +#define NV40TCL_VP_UPLOAD_INST__SIZE 0x00000004 +#define NV40TCL_CLIP_PLANE_ENABLE 0x00001478 +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE0 (1 << 1) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE1 (1 << 5) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE2 (1 << 9) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE3 (1 << 13) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE4 (1 << 17) +#define NV40TCL_CLIP_PLANE_ENABLE_PLANE5 (1 << 21) +#define NV40TCL_POLYGON_STIPPLE_ENABLE 0x0000147c +#define NV40TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001480+((x)*4)) +#define NV40TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV40TCL_VTX_ATTR_3F_X(x) (0x00001500+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3F_Y(x) (0x00001504+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_3F_Z(x) (0x00001508+((x)*16)) +#define NV40TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 +#define NV40TCL_VTXBUF_ADDRESS(x) (0x00001680+((x)*4)) +#define NV40TCL_VTXBUF_ADDRESS__SIZE 0x00000010 +#define NV40TCL_VTXBUF_ADDRESS_DMA1 (1 << 31) +#define NV40TCL_VTXBUF_ADDRESS_OFFSET_SHIFT 0 +#define NV40TCL_VTXBUF_ADDRESS_OFFSET_MASK 0x0fffffff +#define NV40TCL_VTX_CACHE_INVALIDATE 0x00001714 +#define NV40TCL_VTXFMT(x) (0x00001740+((x)*4)) +#define NV40TCL_VTXFMT__SIZE 0x00000010 +#define NV40TCL_VTXFMT_TYPE_SHIFT 0 +#define NV40TCL_VTXFMT_TYPE_MASK 0x0000000f +#define NV40TCL_VTXFMT_TYPE_FLOAT 0x00000002 +#define NV40TCL_VTXFMT_TYPE_UBYTE 0x00000004 +#define NV40TCL_VTXFMT_TYPE_USHORT 0x00000005 +#define NV40TCL_VTXFMT_SIZE_SHIFT 4 +#define NV40TCL_VTXFMT_SIZE_MASK 0x000000f0 +#define NV40TCL_VTXFMT_STRIDE_SHIFT 8 +#define NV40TCL_VTXFMT_STRIDE_MASK 0x0000ff00 +#define NV40TCL_QUERY_RESET 0x000017c8 +#define NV40TCL_QUERY_UNK17CC 0x000017cc +#define NV40TCL_QUERY_GET 0x00001800 +#define NV40TCL_QUERY_GET_UNK24_SHIFT 24 +#define NV40TCL_QUERY_GET_UNK24_MASK 0xff000000 +#define NV40TCL_QUERY_GET_OFFSET_SHIFT 0 +#define NV40TCL_QUERY_GET_OFFSET_MASK 0x00ffffff +#define NV40TCL_BEGIN_END 0x00001808 +#define NV40TCL_BEGIN_END_STOP 0x00000000 +#define NV40TCL_BEGIN_END_POINTS 0x00000001 +#define NV40TCL_BEGIN_END_LINES 0x00000002 +#define NV40TCL_BEGIN_END_LINE_LOOP 0x00000003 +#define NV40TCL_BEGIN_END_LINE_STRIP 0x00000004 +#define NV40TCL_BEGIN_END_TRIANGLES 0x00000005 +#define NV40TCL_BEGIN_END_TRIANGLE_STRIP 0x00000006 +#define NV40TCL_BEGIN_END_TRIANGLE_FAN 0x00000007 +#define NV40TCL_BEGIN_END_QUADS 0x00000008 +#define NV40TCL_BEGIN_END_QUAD_STRIP 0x00000009 +#define NV40TCL_BEGIN_END_POLYGON 0x0000000a +#define NV40TCL_VB_ELEMENT_U16 0x0000180c +#define NV40TCL_VB_ELEMENT_U16_1_SHIFT 16 +#define NV40TCL_VB_ELEMENT_U16_1_MASK 0xffff0000 +#define NV40TCL_VB_ELEMENT_U16_0_SHIFT 0 +#define NV40TCL_VB_ELEMENT_U16_0_MASK 0x0000ffff +#define NV40TCL_VB_ELEMENT_U32 0x00001810 +#define NV40TCL_VB_VERTEX_BATCH 0x00001814 +#define NV40TCL_VB_VERTEX_BATCH_COUNT_SHIFT 24 +#define NV40TCL_VB_VERTEX_BATCH_COUNT_MASK 0xff000000 +#define NV40TCL_VB_VERTEX_BATCH_START_SHIFT 0 +#define NV40TCL_VB_VERTEX_BATCH_START_MASK 0x00ffffff +#define NV40TCL_VERTEX_DATA 0x00001818 +#define NV40TCL_IDXBUF_ADDRESS 0x0000181c +#define NV40TCL_IDXBUF_FORMAT 0x00001820 +#define NV40TCL_IDXBUF_FORMAT_TYPE_SHIFT 4 +#define NV40TCL_IDXBUF_FORMAT_TYPE_MASK 0x000000f0 +#define NV40TCL_IDXBUF_FORMAT_TYPE_U32 0x00000000 +#define NV40TCL_IDXBUF_FORMAT_TYPE_U16 0x00000010 +#define NV40TCL_IDXBUF_FORMAT_DMA1 (1 << 0) +#define NV40TCL_VB_INDEX_BATCH 0x00001824 +#define NV40TCL_VB_INDEX_BATCH_COUNT_SHIFT 24 +#define NV40TCL_VB_INDEX_BATCH_COUNT_MASK 0xff000000 +#define NV40TCL_VB_INDEX_BATCH_START_SHIFT 0 +#define NV40TCL_VB_INDEX_BATCH_START_MASK 0x00ffffff +#define NV40TCL_POLYGON_MODE_FRONT 0x00001828 +#define NV40TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV40TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV40TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV40TCL_POLYGON_MODE_BACK 0x0000182c +#define NV40TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV40TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV40TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV40TCL_CULL_FACE 0x00001830 +#define NV40TCL_CULL_FACE_FRONT 0x00000404 +#define NV40TCL_CULL_FACE_BACK 0x00000405 +#define NV40TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV40TCL_FRONT_FACE 0x00001834 +#define NV40TCL_FRONT_FACE_CW 0x00000900 +#define NV40TCL_FRONT_FACE_CCW 0x00000901 +#define NV40TCL_POLYGON_SMOOTH_ENABLE 0x00001838 +#define NV40TCL_CULL_FACE_ENABLE 0x0000183c +#define NV40TCL_TEX_SIZE1(x) (0x00001840+((x)*4)) +#define NV40TCL_TEX_SIZE1__SIZE 0x00000008 +#define NV40TCL_TEX_SIZE1_DEPTH_SHIFT 20 +#define NV40TCL_TEX_SIZE1_DEPTH_MASK 0xfff00000 +#define NV40TCL_TEX_SIZE1_PITCH_SHIFT 0 +#define NV40TCL_TEX_SIZE1_PITCH_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_2F_X(x) (0x00001880+((x)*8)) +#define NV40TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2F_Y(x) (0x00001884+((x)*8)) +#define NV40TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2I(x) (0x00001900+((x)*4)) +#define NV40TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4UB(x) (0x00001940+((x)*4)) +#define NV40TCL_VTX_ATTR_4UB__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4UB_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_4UB_X_MASK 0x000000ff +#define NV40TCL_VTX_ATTR_4UB_Y_SHIFT 8 +#define NV40TCL_VTX_ATTR_4UB_Y_MASK 0x0000ff00 +#define NV40TCL_VTX_ATTR_4UB_Z_SHIFT 16 +#define NV40TCL_VTX_ATTR_4UB_Z_MASK 0x00ff0000 +#define NV40TCL_VTX_ATTR_4UB_W_SHIFT 24 +#define NV40TCL_VTX_ATTR_4UB_W_MASK 0xff000000 +#define NV40TCL_VTX_ATTR_4I_XY(x) (0x00001980+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_XY__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_XY_X_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_XY_X_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_XY_Y_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_XY_Y_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_4I_ZW(x) (0x00001984+((x)*8)) +#define NV40TCL_VTX_ATTR_4I_ZW__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4I_ZW_Z_SHIFT 0 +#define NV40TCL_VTX_ATTR_4I_ZW_Z_MASK 0x0000ffff +#define NV40TCL_VTX_ATTR_4I_ZW_W_SHIFT 16 +#define NV40TCL_VTX_ATTR_4I_ZW_W_MASK 0xffff0000 +#define NV40TCL_TEX_OFFSET(x) (0x00001a00+((x)*32)) +#define NV40TCL_TEX_OFFSET__SIZE 0x00000010 +#define NV40TCL_TEX_FORMAT(x) (0x00001a04+((x)*32)) +#define NV40TCL_TEX_FORMAT__SIZE 0x00000010 +#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT 16 +#define NV40TCL_TEX_FORMAT_MIPMAP_COUNT_MASK 0x000f0000 +#define NV40TCL_TEX_FORMAT_RECT (1 << 14) +#define NV40TCL_TEX_FORMAT_LINEAR (1 << 13) +#define NV40TCL_TEX_FORMAT_FORMAT_SHIFT 8 +#define NV40TCL_TEX_FORMAT_FORMAT_MASK 0x00001f00 +#define NV40TCL_TEX_FORMAT_FORMAT_L8 0x00000100 +#define NV40TCL_TEX_FORMAT_FORMAT_A1R5G5B5 0x00000200 +#define NV40TCL_TEX_FORMAT_FORMAT_A4R4G4B4 0x00000300 +#define NV40TCL_TEX_FORMAT_FORMAT_R5G6B5 0x00000400 +#define NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8 0x00000500 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT1 0x00000600 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT3 0x00000700 +#define NV40TCL_TEX_FORMAT_FORMAT_DXT5 0x00000800 +#define NV40TCL_TEX_FORMAT_FORMAT_A8L8 0x00000b00 +#define NV40TCL_TEX_FORMAT_FORMAT_Z24 0x00001000 +#define NV40TCL_TEX_FORMAT_FORMAT_Z16 0x00001200 +#define NV40TCL_TEX_FORMAT_FORMAT_A16 0x00001400 +#define NV40TCL_TEX_FORMAT_FORMAT_A16L16 0x00001500 +#define NV40TCL_TEX_FORMAT_FORMAT_HILO8 0x00001800 +#define NV40TCL_TEX_FORMAT_FORMAT_RGBA16F 0x00001a00 +#define NV40TCL_TEX_FORMAT_FORMAT_RGBA32F 0x00001b00 +#define NV40TCL_TEX_FORMAT_DIMS_SHIFT 4 +#define NV40TCL_TEX_FORMAT_DIMS_MASK 0x000000f0 +#define NV40TCL_TEX_FORMAT_DIMS_1D 0x00000010 +#define NV40TCL_TEX_FORMAT_DIMS_2D 0x00000020 +#define NV40TCL_TEX_FORMAT_DIMS_3D 0x00000030 +#define NV40TCL_TEX_FORMAT_NO_BORDER (1 << 3) +#define NV40TCL_TEX_FORMAT_CUBIC (1 << 2) +#define NV40TCL_TEX_FORMAT_DMA1 (1 << 1) +#define NV40TCL_TEX_FORMAT_DMA0 (1 << 0) +#define NV40TCL_TEX_WRAP(x) (0x00001a08+((x)*32)) +#define NV40TCL_TEX_WRAP__SIZE 0x00000010 +#define NV40TCL_TEX_WRAP_S_SHIFT 0 +#define NV40TCL_TEX_WRAP_S_MASK 0x000000ff +#define NV40TCL_TEX_WRAP_S_REPEAT 0x00000001 +#define NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT 0x00000002 +#define NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE 0x00000003 +#define NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER 0x00000004 +#define NV40TCL_TEX_WRAP_S_CLAMP 0x00000005 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE 0x00000006 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER 0x00000007 +#define NV40TCL_TEX_WRAP_S_MIRROR_CLAMP 0x00000008 +#define NV40TCL_TEX_WRAP_T_SHIFT 8 +#define NV40TCL_TEX_WRAP_T_MASK 0x00000f00 +#define NV40TCL_TEX_WRAP_T_REPEAT 0x00000100 +#define NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT 0x00000200 +#define NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE 0x00000300 +#define NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER 0x00000400 +#define NV40TCL_TEX_WRAP_T_CLAMP 0x00000500 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_EDGE 0x00000600 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP_TO_BORDER 0x00000700 +#define NV40TCL_TEX_WRAP_T_MIRROR_CLAMP 0x00000800 +#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_SHIFT 12 +#define NV40TCL_TEX_WRAP_EXPAND_NORMAL_MASK 0x0000f000 +#define NV40TCL_TEX_WRAP_R_SHIFT 16 +#define NV40TCL_TEX_WRAP_R_MASK 0x00ff0000 +#define NV40TCL_TEX_WRAP_R_REPEAT 0x00010000 +#define NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT 0x00020000 +#define NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE 0x00030000 +#define NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER 0x00040000 +#define NV40TCL_TEX_WRAP_R_CLAMP 0x00050000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_EDGE 0x00060000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP_TO_BORDER 0x00070000 +#define NV40TCL_TEX_WRAP_R_MIRROR_CLAMP 0x00080000 +#define NV40TCL_TEX_WRAP_RCOMP_SHIFT 28 +#define NV40TCL_TEX_WRAP_RCOMP_MASK 0xf0000000 +#define NV40TCL_TEX_WRAP_RCOMP_NEVER 0x00000000 +#define NV40TCL_TEX_WRAP_RCOMP_GREATER 0x10000000 +#define NV40TCL_TEX_WRAP_RCOMP_EQUAL 0x20000000 +#define NV40TCL_TEX_WRAP_RCOMP_GEQUAL 0x30000000 +#define NV40TCL_TEX_WRAP_RCOMP_LESS 0x40000000 +#define NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL 0x50000000 +#define NV40TCL_TEX_WRAP_RCOMP_LEQUAL 0x60000000 +#define NV40TCL_TEX_WRAP_RCOMP_ALWAYS 0x70000000 +#define NV40TCL_TEX_ENABLE(x) (0x00001a0c+((x)*32)) +#define NV40TCL_TEX_ENABLE__SIZE 0x00000010 +#define NV40TCL_TEX_ENABLE_ENABLE (1 << 31) +#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_SHIFT 27 +#define NV40TCL_TEX_ENABLE_MIPMAP_MIN_LOD_MASK 0x38000000 +#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_SHIFT 15 +#define NV40TCL_TEX_ENABLE_MIPMAP_MAX_LOD_MASK 0x00038000 +#define NV40TCL_TEX_ENABLE_ANISO_SHIFT 4 +#define NV40TCL_TEX_ENABLE_ANISO_MASK 0x000000f0 +#define NV40TCL_TEX_ENABLE_ANISO_NONE 0x00000000 +#define NV40TCL_TEX_ENABLE_ANISO_2X 0x00000010 +#define NV40TCL_TEX_ENABLE_ANISO_4X 0x00000020 +#define NV40TCL_TEX_ENABLE_ANISO_6X 0x00000030 +#define NV40TCL_TEX_ENABLE_ANISO_8X 0x00000040 +#define NV40TCL_TEX_ENABLE_ANISO_10X 0x00000050 +#define NV40TCL_TEX_ENABLE_ANISO_12X 0x00000060 +#define NV40TCL_TEX_ENABLE_ANISO_16X 0x00000070 +#define NV40TCL_TEX_SWIZZLE(x) (0x00001a10+((x)*32)) +#define NV40TCL_TEX_SWIZZLE__SIZE 0x00000010 +#define NV40TCL_TEX_SWIZZLE_S0_X_SHIFT 14 +#define NV40TCL_TEX_SWIZZLE_S0_X_MASK 0x0000c000 +#define NV40TCL_TEX_SWIZZLE_S0_X_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_X_ONE 0x00004000 +#define NV40TCL_TEX_SWIZZLE_S0_X_S1 0x00008000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT 12 +#define NV40TCL_TEX_SWIZZLE_S0_Y_MASK 0x00003000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_ONE 0x00001000 +#define NV40TCL_TEX_SWIZZLE_S0_Y_S1 0x00002000 +#define NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT 10 +#define NV40TCL_TEX_SWIZZLE_S0_Z_MASK 0x00000c00 +#define NV40TCL_TEX_SWIZZLE_S0_Z_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_Z_ONE 0x00000400 +#define NV40TCL_TEX_SWIZZLE_S0_Z_S1 0x00000800 +#define NV40TCL_TEX_SWIZZLE_S0_W_SHIFT 8 +#define NV40TCL_TEX_SWIZZLE_S0_W_MASK 0x00000300 +#define NV40TCL_TEX_SWIZZLE_S0_W_ZERO 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S0_W_ONE 0x00000100 +#define NV40TCL_TEX_SWIZZLE_S0_W_S1 0x00000200 +#define NV40TCL_TEX_SWIZZLE_S1_X_SHIFT 6 +#define NV40TCL_TEX_SWIZZLE_S1_X_MASK 0x000000c0 +#define NV40TCL_TEX_SWIZZLE_S1_X_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_X_Z 0x00000040 +#define NV40TCL_TEX_SWIZZLE_S1_X_Y 0x00000080 +#define NV40TCL_TEX_SWIZZLE_S1_X_X 0x000000c0 +#define NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT 4 +#define NV40TCL_TEX_SWIZZLE_S1_Y_MASK 0x00000030 +#define NV40TCL_TEX_SWIZZLE_S1_Y_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_Y_Z 0x00000010 +#define NV40TCL_TEX_SWIZZLE_S1_Y_Y 0x00000020 +#define NV40TCL_TEX_SWIZZLE_S1_Y_X 0x00000030 +#define NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT 2 +#define NV40TCL_TEX_SWIZZLE_S1_Z_MASK 0x0000000c +#define NV40TCL_TEX_SWIZZLE_S1_Z_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_Z_Z 0x00000004 +#define NV40TCL_TEX_SWIZZLE_S1_Z_Y 0x00000008 +#define NV40TCL_TEX_SWIZZLE_S1_Z_X 0x0000000c +#define NV40TCL_TEX_SWIZZLE_S1_W_SHIFT 0 +#define NV40TCL_TEX_SWIZZLE_S1_W_MASK 0x00000003 +#define NV40TCL_TEX_SWIZZLE_S1_W_W 0x00000000 +#define NV40TCL_TEX_SWIZZLE_S1_W_Z 0x00000001 +#define NV40TCL_TEX_SWIZZLE_S1_W_Y 0x00000002 +#define NV40TCL_TEX_SWIZZLE_S1_W_X 0x00000003 +#define NV40TCL_TEX_FILTER(x) (0x00001a14+((x)*32)) +#define NV40TCL_TEX_FILTER__SIZE 0x00000010 +#define NV40TCL_TEX_FILTER_SIGNED_ALPHA (1 << 31) +#define NV40TCL_TEX_FILTER_SIGNED_RED (1 << 30) +#define NV40TCL_TEX_FILTER_SIGNED_GREEN (1 << 29) +#define NV40TCL_TEX_FILTER_SIGNED_BLUE (1 << 28) +#define NV40TCL_TEX_FILTER_MIN_SHIFT 16 +#define NV40TCL_TEX_FILTER_MIN_MASK 0x000f0000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST 0x00010000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR 0x00020000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST 0x00030000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST 0x00040000 +#define NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR 0x00050000 +#define NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR 0x00060000 +#define NV40TCL_TEX_FILTER_MAG_SHIFT 24 +#define NV40TCL_TEX_FILTER_MAG_MASK 0x0f000000 +#define NV40TCL_TEX_FILTER_MAG_NEAREST 0x01000000 +#define NV40TCL_TEX_FILTER_MAG_LINEAR 0x02000000 +#define NV40TCL_TEX_SIZE0(x) (0x00001a18+((x)*32)) +#define NV40TCL_TEX_SIZE0__SIZE 0x00000010 +#define NV40TCL_TEX_SIZE0_H_SHIFT 0 +#define NV40TCL_TEX_SIZE0_H_MASK 0x0000ffff +#define NV40TCL_TEX_SIZE0_W_SHIFT 16 +#define NV40TCL_TEX_SIZE0_W_MASK 0xffff0000 +#define NV40TCL_TEX_BORDER_COLOR(x) (0x00001a1c+((x)*32)) +#define NV40TCL_TEX_BORDER_COLOR__SIZE 0x00000010 +#define NV40TCL_TEX_BORDER_COLOR_B_SHIFT 0 +#define NV40TCL_TEX_BORDER_COLOR_B_MASK 0x000000ff +#define NV40TCL_TEX_BORDER_COLOR_G_SHIFT 8 +#define NV40TCL_TEX_BORDER_COLOR_G_MASK 0x0000ff00 +#define NV40TCL_TEX_BORDER_COLOR_R_SHIFT 16 +#define NV40TCL_TEX_BORDER_COLOR_R_MASK 0x00ff0000 +#define NV40TCL_TEX_BORDER_COLOR_A_SHIFT 24 +#define NV40TCL_TEX_BORDER_COLOR_A_MASK 0xff000000 +#define NV40TCL_VTX_ATTR_4F_X(x) (0x00001c00+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_Y(x) (0x00001c04+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_Z(x) (0x00001c08+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV40TCL_VTX_ATTR_4F_W(x) (0x00001c0c+((x)*16)) +#define NV40TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV40TCL_FP_CONTROL 0x00001d60 +#define NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT 24 +#define NV40TCL_FP_CONTROL_TEMP_COUNT_MASK 0xff000000 +#define NV40TCL_FP_CONTROL_KIL (1 << 7) +#define NV40TCL_MULTISAMPLE_CONTROL 0x00001d7c +#define NV40TCL_CLEAR_VALUE_DEPTH 0x00001d8c +#define NV40TCL_CLEAR_VALUE_COLOR 0x00001d90 +#define NV40TCL_CLEAR_BUFFERS 0x00001d94 +#define NV40TCL_CLEAR_BUFFERS_COLOR_A (1 << 7) +#define NV40TCL_CLEAR_BUFFERS_COLOR_B (1 << 6) +#define NV40TCL_CLEAR_BUFFERS_COLOR_G (1 << 5) +#define NV40TCL_CLEAR_BUFFERS_COLOR_R (1 << 4) +#define NV40TCL_CLEAR_BUFFERS_STENCIL (1 << 1) +#define NV40TCL_CLEAR_BUFFERS_DEPTH (1 << 0) +#define NV40TCL_LINE_STIPPLE_ENABLE 0x00001db4 +#define NV40TCL_LINE_STIPPLE_PATTERN 0x00001db8 +#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_SHIFT 0 +#define NV40TCL_LINE_STIPPLE_PATTERN_FACTOR_MASK 0x0000ffff +#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_SHIFT 16 +#define NV40TCL_LINE_STIPPLE_PATTERN_PATTERN_MASK 0xffff0000 +#define NV40TCL_VTX_ATTR_1F(x) (0x00001e40+((x)*4)) +#define NV40TCL_VTX_ATTR_1F__SIZE 0x00000010 +#define NV40TCL_VP_UPLOAD_FROM_ID 0x00001e9c +#define NV40TCL_VP_START_FROM_ID 0x00001ea0 +#define NV40TCL_POINT_SIZE 0x00001ee0 +#define NV40TCL_POINT_SPRITE 0x00001ee8 +#define NV40TCL_VP_UPLOAD_CONST_ID 0x00001efc +#define NV40TCL_VP_UPLOAD_CONST_X(x) (0x00001f00+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_X__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_Y(x) (0x00001f04+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_Y__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_Z(x) (0x00001f08+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_Z__SIZE 0x00000004 +#define NV40TCL_VP_UPLOAD_CONST_W(x) (0x00001f0c+((x)*16)) +#define NV40TCL_VP_UPLOAD_CONST_W__SIZE 0x00000004 +#define NV40TCL_TEX_CACHE_CTL 0x00001fd8 +#define NV40TCL_VP_ATTRIB_EN 0x00001ff0 +#define NV40TCL_VP_RESULT_EN 0x00001ff4 + + +#define NV44TCL 0x00004497 + + + +#define NV50_2D 0x0000502d + +#define NV50_2D_NOP 0x00000100 +#define NV50_2D_NOTIFY 0x00000104 +#define NV50_2D_DMA_NOTIFY 0x00000180 +#define NV50_2D_DMA_IN_MEMORY0 0x00000184 +#define NV50_2D_DMA_IN_MEMORY1 0x00000188 +#define NV50_2D_DMA_IN_MEMORY2 0x0000018c +#define NV50_2D_DST_FORMAT 0x00000200 +#define NV50_2D_DST_FORMAT_32BPP 0x000000cf +#define NV50_2D_DST_FORMAT_24BPP 0x000000e6 +#define NV50_2D_DST_FORMAT_16BPP 0x000000e8 +#define NV50_2D_DST_FORMAT_8BPP 0x000000f3 +#define NV50_2D_DST_FORMAT_15BPP 0x000000f8 +#define NV50_2D_DST_PITCH 0x00000214 +#define NV50_2D_DST_WIDTH 0x00000218 +#define NV50_2D_DST_HEIGHT 0x0000021c +#define NV50_2D_DST_ADDRESS_HIGH 0x00000220 +#define NV50_2D_DST_ADDRESS_LOW 0x00000224 +#define NV50_2D_SRC_FORMAT 0x00000230 +#define NV50_2D_SRC_FORMAT_32BPP 0x000000cf +#define NV50_2D_SRC_FORMAT_24BPP 0x000000e6 +#define NV50_2D_SRC_FORMAT_16BPP 0x000000e8 +#define NV50_2D_SRC_FORMAT_8BPP 0x000000f3 +#define NV50_2D_SRC_FORMAT_15BPP 0x000000f8 +#define NV50_2D_SRC_PITCH 0x00000244 +#define NV50_2D_SRC_WIDTH 0x00000248 +#define NV50_2D_SRC_HEIGHT 0x0000024c +#define NV50_2D_SRC_ADDRESS_HIGH 0x00000250 +#define NV50_2D_SRC_ADDRESS_LOW 0x00000254 +#define NV50_2D_CLIP_X 0x00000280 +#define NV50_2D_CLIP_Y 0x00000284 +#define NV50_2D_CLIP_Z 0x00000288 +#define NV50_2D_CLIP_W 0x0000028c +#define NV50_2D_ROP 0x000002a0 +#define NV50_2D_OPERATION 0x000002ac +#define NV50_2D_OPERATION_SRCCOPY_AND 0x00000000 +#define NV50_2D_OPERATION_ROP_AND 0x00000001 +#define NV50_2D_OPERATION_BLEND_AND 0x00000002 +#define NV50_2D_OPERATION_SRCCOPY 0x00000003 +#define NV50_2D_OPERATION_SRCCOPY_PREMULT 0x00000004 +#define NV50_2D_OPERATION_BLEND_PREMULT 0x00000005 +#define NV50_2D_PATTERN_FORMAT 0x000002e8 +#define NV50_2D_PATTERN_FORMAT_16BPP 0x00000000 +#define NV50_2D_PATTERN_FORMAT_15BPP 0x00000001 +#define NV50_2D_PATTERN_FORMAT_32BPP 0x00000002 +#define NV50_2D_PATTERN_FORMAT_8BPP 0x00000003 +#define NV50_2D_PATTERN_COLOR(x) (0x000002f0+((x)*4)) +#define NV50_2D_PATTERN_COLOR__SIZE 0x00000002 +#define NV50_2D_PATTERN_BITMAP(x) (0x000002f8+((x)*4)) +#define NV50_2D_PATTERN_BITMAP__SIZE 0x00000002 +#define NV50_2D_RECT_FORMAT 0x00000584 +#define NV50_2D_RECT_FORMAT_32BPP 0x000000cf +#define NV50_2D_RECT_FORMAT_24BPP 0x000000e6 +#define NV50_2D_RECT_FORMAT_16BPP 0x000000e8 +#define NV50_2D_RECT_FORMAT_8BPP 0x000000f3 +#define NV50_2D_RECT_FORMAT_15BPP 0x000000f8 +#define NV50_2D_RECT_COLOR 0x00000588 +#define NV50_2D_RECT_X1 0x00000600 +#define NV50_2D_RECT_Y1 0x00000604 +#define NV50_2D_RECT_X2 0x00000608 +#define NV50_2D_RECT_Y2 0x0000060c +#define NV50_2D_SIFC_UNK0800 0x00000800 +#define NV50_2D_SIFC_FORMAT 0x00000804 +#define NV50_2D_SIFC_FORMAT_32BPP 0x000000cf +#define NV50_2D_SIFC_FORMAT_24BPP 0x000000e6 +#define NV50_2D_SIFC_FORMAT_16BPP 0x000000e8 +#define NV50_2D_SIFC_FORMAT_8BPP 0x000000f3 +#define NV50_2D_SIFC_FORMAT_15BPP 0x000000f8 +#define NV50_2D_SIFC_WIDTH 0x00000838 +#define NV50_2D_SIFC_HEIGHT 0x0000083c +#define NV50_2D_SIFC_SCALE_UNK0840 0x00000840 +#define NV50_2D_SIFC_SCALE_UNK0844 0x00000844 +#define NV50_2D_SIFC_SCALE_UNK0848 0x00000848 +#define NV50_2D_SIFC_SCALE_UNK084C 0x0000084c +#define NV50_2D_SIFC_UNK0850 0x00000850 +#define NV50_2D_SIFC_DST_X 0x00000854 +#define NV50_2D_SIFC_UNK0858 0x00000858 +#define NV50_2D_SIFC_DST_Y 0x0000085c +#define NV50_2D_SIFC_DATA 0x00000860 +#define NV50_2D_BLIT_DST_X 0x000008b0 +#define NV50_2D_BLIT_DST_Y 0x000008b4 +#define NV50_2D_BLIT_DST_W 0x000008b8 +#define NV50_2D_BLIT_DST_H 0x000008bc +#define NV50_2D_BLIT_SRC_X 0x000008d4 +#define NV50_2D_BLIT_SRC_Y 0x000008dc + + +#define NV50_MEMORY_TO_MEMORY_FORMAT 0x00005039 + +#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH 0x00000238 +#define NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_OUT_HIGH 0x0000023c + + +#define NV50TCL 0x00005097 + +#define NV50TCL_NOP 0x00000100 +#define NV50TCL_NOTIFY 0x00000104 +#define NV50TCL_DMA_NOTIFY 0x00000180 +#define NV50TCL_DMA_UNK0(x) (0x00000184+((x)*4)) +#define NV50TCL_DMA_UNK0__SIZE 0x0000000b +#define NV50TCL_DMA_UNK1(x) (0x000001c0+((x)*4)) +#define NV50TCL_DMA_UNK1__SIZE 0x00000008 +#define NV50TCL_RT_ADDRESS_HIGH(x) (0x00000200+((x)*32)) +#define NV50TCL_RT_ADDRESS_HIGH__SIZE 0x00000008 +#define NV50TCL_RT_ADDRESS_LOW(x) (0x00000204+((x)*32)) +#define NV50TCL_RT_ADDRESS_LOW__SIZE 0x00000008 +#define NV50TCL_RT_FORMAT(x) (0x00000208+((x)*32)) +#define NV50TCL_RT_FORMAT__SIZE 0x00000008 +#define NV50TCL_RT_FORMAT_32BPP 0x000000cf +#define NV50TCL_RT_FORMAT_24BPP 0x000000e6 +#define NV50TCL_RT_FORMAT_16BPP 0x000000e8 +#define NV50TCL_RT_FORMAT_8BPP 0x000000f3 +#define NV50TCL_RT_FORMAT_15BPP 0x000000f8 +#define NV50TCL_RT_TILE_UNK(x) (0x0000020c+((x)*32)) +#define NV50TCL_RT_TILE_UNK__SIZE 0x00000008 +#define NV50TCL_RT_UNK4(x) (0x00000210+((x)*32)) +#define NV50TCL_RT_UNK4__SIZE 0x00000008 +#define NV50TCL_VTX_ATTR_1F(x) (0x00000300+((x)*4)) +#define NV50TCL_VTX_ATTR_1F__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2F_X(x) (0x00000380+((x)*8)) +#define NV50TCL_VTX_ATTR_2F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2F_Y(x) (0x00000384+((x)*8)) +#define NV50TCL_VTX_ATTR_2F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_X(x) (0x00000400+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_Y(x) (0x00000404+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_Z(x) (0x00000408+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_Z__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_3F_W(x) (0x0000040c+((x)*16)) +#define NV50TCL_VTX_ATTR_3F_W__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_X(x) (0x00000500+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_X__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_Y(x) (0x00000504+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_Y__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_Z(x) (0x00000508+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_Z__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4F_W(x) (0x0000050c+((x)*16)) +#define NV50TCL_VTX_ATTR_4F_W__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2I(x) (0x00000680+((x)*4)) +#define NV50TCL_VTX_ATTR_2I__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_2I_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_2I_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_2I_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_2I_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4I_0(x) (0x00000700+((x)*8)) +#define NV50TCL_VTX_ATTR_4I_0__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4I_0_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_4I_0_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4I_0_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_4I_0_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4I_1(x) (0x00000704+((x)*8)) +#define NV50TCL_VTX_ATTR_4I_1__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4I_1_Z_SHIFT 0 +#define NV50TCL_VTX_ATTR_4I_1_Z_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4I_1_W_SHIFT 16 +#define NV50TCL_VTX_ATTR_4I_1_W_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4NI_0(x) (0x00000780+((x)*8)) +#define NV50TCL_VTX_ATTR_4NI_0__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4NI_0_X_SHIFT 0 +#define NV50TCL_VTX_ATTR_4NI_0_X_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4NI_0_Y_SHIFT 16 +#define NV50TCL_VTX_ATTR_4NI_0_Y_MASK 0xffff0000 +#define NV50TCL_VTX_ATTR_4NI_1(x) (0x00000784+((x)*8)) +#define NV50TCL_VTX_ATTR_4NI_1__SIZE 0x00000010 +#define NV50TCL_VTX_ATTR_4NI_1_Z_SHIFT 0 +#define NV50TCL_VTX_ATTR_4NI_1_Z_MASK 0x0000ffff +#define NV50TCL_VTX_ATTR_4NI_1_W_SHIFT 16 +#define NV50TCL_VTX_ATTR_4NI_1_W_MASK 0xffff0000 +#define NV50TCL_VERTEX_ARRAY_FORMAT(x) (0x00000900+((x)*16)) +#define NV50TCL_VERTEX_ARRAY_FORMAT__SIZE 0x00000010 +#define NV50TCL_VIEWPORT_UNK0(x) (0x00000a00+((x)*4)) +#define NV50TCL_VIEWPORT_UNK0__SIZE 0x00000003 +#define NV50TCL_VIEWPORT_UNK1(x) (0x00000a0c+((x)*4)) +#define NV50TCL_VIEWPORT_UNK1__SIZE 0x00000003 +#define NV50TCL_VIEWPORT_HORIZ 0x00000c00 +#define NV50TCL_VIEWPORT_HORIZ_X_SHIFT 0 +#define NV50TCL_VIEWPORT_HORIZ_X_MASK 0x0000ffff +#define NV50TCL_VIEWPORT_HORIZ_W_SHIFT 16 +#define NV50TCL_VIEWPORT_HORIZ_W_MASK 0xffff0000 +#define NV50TCL_VIEWPORT_VERT 0x00000c04 +#define NV50TCL_VIEWPORT_VERT_Y_SHIFT 0 +#define NV50TCL_VIEWPORT_VERT_Y_MASK 0x0000ffff +#define NV50TCL_VIEWPORT_VERT_H_SHIFT 16 +#define NV50TCL_VIEWPORT_VERT_H_MASK 0xffff0000 +#define NV50TCL_DEPTH_RANGE_NEAR 0x00000c08 +#define NV50TCL_DEPTH_RANGE_FAR 0x00000c0c +#define NV50TCL_VIEWPORT_CLIP_HORIZ(x) (0x00000d00+((x)*8)) +#define NV50TCL_VIEWPORT_CLIP_HORIZ__SIZE 0x00000008 +#define NV50TCL_VIEWPORT_CLIP_VERT(x) (0x00000d04+((x)*8)) +#define NV50TCL_VIEWPORT_CLIP_VERT__SIZE 0x00000008 +#define NV50TCL_VERTEX_BUFFER_FIRST 0x00000d74 +#define NV50TCL_VERTEX_BUFFER_COUNT 0x00000d78 +#define NV50TCL_CLEAR_COLOR(x) (0x00000d80+((x)*4)) +#define NV50TCL_CLEAR_COLOR__SIZE 0x00000004 +#define NV50TCL_CLEAR_DEPTH 0x00000d90 +#define NV50TCL_CLEAR_STENCIL 0x00000da0 +#define NV50TCL_POLYGON_MODE_FRONT 0x00000dac +#define NV50TCL_POLYGON_MODE_FRONT_POINT 0x00001b00 +#define NV50TCL_POLYGON_MODE_FRONT_LINE 0x00001b01 +#define NV50TCL_POLYGON_MODE_FRONT_FILL 0x00001b02 +#define NV50TCL_POLYGON_MODE_BACK 0x00000db0 +#define NV50TCL_POLYGON_MODE_BACK_POINT 0x00001b00 +#define NV50TCL_POLYGON_MODE_BACK_LINE 0x00001b01 +#define NV50TCL_POLYGON_MODE_BACK_FILL 0x00001b02 +#define NV50TCL_POLYGON_SMOOTH_ENABLE 0x00000db4 +#define NV50TCL_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0 +#define NV50TCL_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4 +#define NV50TCL_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8 +#define NV50TCL_SCISSOR_HORIZ 0x00000e04 +#define NV50TCL_SCISSOR_HORIZ_L_SHIFT 0 +#define NV50TCL_SCISSOR_HORIZ_L_MASK 0x0000ffff +#define NV50TCL_SCISSOR_HORIZ_R_SHIFT 16 +#define NV50TCL_SCISSOR_HORIZ_R_MASK 0xffff0000 +#define NV50TCL_SCISSOR_VERT 0x00000e08 +#define NV50TCL_SCISSOR_VERT_T_SHIFT 0 +#define NV50TCL_SCISSOR_VERT_T_MASK 0x0000ffff +#define NV50TCL_SCISSOR_VERT_B_SHIFT 16 +#define NV50TCL_SCISSOR_VERT_B_MASK 0xffff0000 +#define NV50TCL_CB_ADDR 0x00000f00 +#define NV50TCL_CB_ADDR_ID_SHIFT 8 +#define NV50TCL_CB_ADDR_ID_MASK 0xffffff00 +#define NV50TCL_CB_ADDR_BUFFER_SHIFT 0 +#define NV50TCL_CB_ADDR_BUFFER_MASK 0x000000ff +#define NV50TCL_CB_DATA(x) (0x00000f04+((x)*4)) +#define NV50TCL_CB_DATA__SIZE 0x00000010 +#define NV50TCL_STENCIL_FRONT_FUNC_REF 0x00000f54 +#define NV50TCL_STENCIL_FRONT_MASK 0x00000f58 +#define NV50TCL_STENCIL_FRONT_FUNC_MASK 0x00000f5c +#define NV50TCL_GP_ADDRESS_HIGH 0x00000f70 +#define NV50TCL_GP_ADDRESS_LOW 0x00000f74 +#define NV50TCL_VP_ADDRESS_HIGH 0x00000f7c +#define NV50TCL_VP_ADDRESS_LOW 0x00000f80 +#define NV50TCL_FP_ADDRESS_HIGH 0x00000fa4 +#define NV50TCL_FP_ADDRESS_LOW 0x00000fa8 +#define NV50TCL_ZETA_ADDRESS_HIGH 0x00000fe0 +#define NV50TCL_ZETA_ADDRESS_LOW 0x00000fe4 +#define NV50TCL_UNKFF4 0x00000ff4 +#define NV50TCL_UNKFF4_W_SHIFT 16 +#define NV50TCL_UNKFF4_W_MASK 0xffff0000 +#define NV50TCL_UNKFF8 0x00000ff8 +#define NV50TCL_UNKFF8_H_SHIFT 16 +#define NV50TCL_UNKFF8_H_MASK 0xffff0000 +#define NV50TCL_RT_HORIZ(x) (0x00001240+((x)*8)) +#define NV50TCL_RT_HORIZ__SIZE 0x00000008 +#define NV50TCL_RT_VERT(x) (0x00001244+((x)*8)) +#define NV50TCL_RT_VERT__SIZE 0x00000008 +#define NV50TCL_CB_DEF_ADDRESS_HIGH 0x00001280 +#define NV50TCL_CB_DEF_ADDRESS_LOW 0x00001284 +#define NV50TCL_CB_DEF_SET 0x00001288 +#define NV50TCL_CB_DEF_SET_SIZE_SHIFT 0 +#define NV50TCL_CB_DEF_SET_SIZE_MASK 0x0000ffff +#define NV50TCL_CB_DEF_SET_BUFFER_SHIFT 16 +#define NV50TCL_CB_DEF_SET_BUFFER_MASK 0xffff0000 +#define NV50TCL_DEPTH_TEST_ENABLE 0x000012cc +#define NV50TCL_SHADE_MODEL 0x000012d4 +#define NV50TCL_SHADE_MODEL_FLAT 0x00001d00 +#define NV50TCL_SHADE_MODEL_SMOOTH 0x00001d01 +#define NV50TCL_DEPTH_WRITE_ENABLE 0x000012e8 +#define NV50TCL_ALPHA_TEST_ENABLE 0x000012ec +#define NV50TCL_DEPTH_TEST_FUNC 0x0000130c +#define NV50TCL_DEPTH_TEST_FUNC_NEVER 0x00000200 +#define NV50TCL_DEPTH_TEST_FUNC_LESS 0x00000201 +#define NV50TCL_DEPTH_TEST_FUNC_EQUAL 0x00000202 +#define NV50TCL_DEPTH_TEST_FUNC_LEQUAL 0x00000203 +#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_DEPTH_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_DEPTH_TEST_FUNC_GEQUAL 0x00000206 +#define NV50TCL_DEPTH_TEST_FUNC_ALWAYS 0x00000207 +#define NV50TCL_ALPHA_TEST_REF 0x00001310 +#define NV50TCL_ALPHA_TEST_FUNC 0x00001314 +#define NV50TCL_ALPHA_TEST_FUNC_NEVER 0x00000200 +#define NV50TCL_ALPHA_TEST_FUNC_LESS 0x00000201 +#define NV50TCL_ALPHA_TEST_FUNC_EQUAL 0x00000202 +#define NV50TCL_ALPHA_TEST_FUNC_LEQUAL 0x00000203 +#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_ALPHA_TEST_FUNC_GREATER 0x00000204 +#define NV50TCL_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_ALPHA_TEST_FUNC_GEQUAL 0x00000206 +#define NV50TCL_ALPHA_TEST_FUNC_ALWAYS 0x00000207 +#define NV50TCL_BLEND_COLOR(x) (0x0000131c+((x)*4)) +#define NV50TCL_BLEND_COLOR__SIZE 0x00000004 +#define NV50TCL_BLEND_EQUATION_RGB 0x00001340 +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006 +#define NV50TCL_BLEND_EQUATION_RGB_MIN 0x00008007 +#define NV50TCL_BLEND_EQUATION_RGB_MAX 0x00008008 +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a +#define NV50TCL_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV50TCL_BLEND_FUNC_SRC_RGB 0x00001344 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_SRC_RGB_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_SRC_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_SRC_RGB_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_SRC_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_FUNC_DST_RGB 0x00001348 +#define NV50TCL_BLEND_FUNC_DST_RGB_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_DST_RGB_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_DST_RGB_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_DST_RGB_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_DST_RGB_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_DST_RGB_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_EQUATION_ALPHA 0x0000134c +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006 +#define NV50TCL_BLEND_EQUATION_ALPHA_MIN 0x00008007 +#define NV50TCL_BLEND_EQUATION_ALPHA_MAX 0x00008008 +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a +#define NV50TCL_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b +#define NV50TCL_BLEND_FUNC_SRC_ALPHA 0x00001350 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_SRC_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_FUNC_DST_ALPHA 0x00001358 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ZERO 0x00000000 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE 0x00000001 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_COLOR 0x00000300 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_COLOR 0x00000301 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA 0x00000302 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_SRC_ALPHA 0x00000303 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_ALPHA 0x00000304 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_ALPHA 0x00000305 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_DST_COLOR 0x00000306 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_DST_COLOR 0x00000307 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_SRC_ALPHA_SATURATE 0x00000308 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_COLOR 0x00008001 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_COLOR 0x00008002 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_CONSTANT_ALPHA 0x00008003 +#define NV50TCL_BLEND_FUNC_DST_ALPHA_ONE_MINUS_CONSTANT_ALPHA 0x00008004 +#define NV50TCL_BLEND_ENABLE(x) (0x00001360+((x)*4)) +#define NV50TCL_BLEND_ENABLE__SIZE 0x00000008 +#define NV50TCL_STENCIL_BACK_ENABLE 0x00001380 +#define NV50TCL_STENCIL_BACK_OP_FAIL 0x00001384 +#define NV50TCL_STENCIL_BACK_OP_FAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL 0x00001388 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_OP_ZPASS 0x0000138c +#define NV50TCL_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a +#define NV50TCL_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC 0x00001390 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50TCL_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207 +#define NV50TCL_STENCIL_BACK_FUNC_REF 0x00001394 +#define NV50TCL_STENCIL_BACK_MASK 0x00001398 +#define NV50TCL_STENCIL_BACK_FUNC_MASK 0x0000139c +#define NV50TCL_LINE_WIDTH 0x000013b0 +#define NV50TCL_VP_START_ID 0x0000140c +#define NV50TCL_GP_START_ID 0x00001410 +#define NV50TCL_FP_START_ID 0x00001414 +#define NV50TCL_POINT_SIZE 0x00001518 +#define NV50TCL_TSC_ADDRESS_HIGH 0x0000155c +#define NV50TCL_TSC_ADDRESS_LOW 0x00001560 +#define NV50TCL_POLYGON_OFFSET_FACTOR 0x0000156c +#define NV50TCL_LINE_SMOOTH_ENABLE 0x00001570 +#define NV50TCL_TIC_ADDRESS_HIGH 0x00001574 +#define NV50TCL_TIC_ADDRESS_LOW 0x00001578 +#define NV50TCL_STENCIL_FRONT_ENABLE 0x00001594 +#define NV50TCL_STENCIL_FRONT_OP_FAIL 0x00001598 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL 0x0000159c +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS 0x000015a0 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507 +#define NV50TCL_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC 0x000015a4 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206 +#define NV50TCL_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207 +#define NV50TCL_POLYGON_OFFSET_UNITS 0x000015bc +#define NV50TCL_VERTEX_BEGIN 0x000015dc +#define NV50TCL_VERTEX_BEGIN_POINTS 0x00000000 +#define NV50TCL_VERTEX_BEGIN_LINES 0x00000001 +#define NV50TCL_VERTEX_BEGIN_LINE_LOOP 0x00000002 +#define NV50TCL_VERTEX_BEGIN_LINE_STRIP 0x00000003 +#define NV50TCL_VERTEX_BEGIN_TRIANGLES 0x00000004 +#define NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP 0x00000005 +#define NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN 0x00000006 +#define NV50TCL_VERTEX_BEGIN_QUADS 0x00000007 +#define NV50TCL_VERTEX_BEGIN_QUAD_STRIP 0x00000008 +#define NV50TCL_VERTEX_BEGIN_POLYGON 0x00000009 +#define NV50TCL_VERTEX_END 0x000015e0 +#define NV50TCL_VERTEX_DATA 0x00001640 +#define NV50TCL_VP_ATTR_EN_0 0x00001650 +#define NV50TCL_VP_ATTR_EN_0_7_SHIFT 28 +#define NV50TCL_VP_ATTR_EN_0_7_MASK 0xf0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNNN 0x10000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYNN 0x20000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYNN 0x30000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNZN 0x40000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNZN 0x50000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYZN 0x60000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYZN 0x70000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNNW 0x80000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNNW 0x90000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYNW 0xa0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYNW 0xb0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NNZW 0xc0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XNZW 0xd0000000 +#define NV50TCL_VP_ATTR_EN_0_7_NYZW 0xe0000000 +#define NV50TCL_VP_ATTR_EN_0_7_XYZW 0xf0000000 +#define NV50TCL_VP_ATTR_EN_0_6_SHIFT 24 +#define NV50TCL_VP_ATTR_EN_0_6_MASK 0x0f000000 +#define NV50TCL_VP_ATTR_EN_0_6_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNNN 0x01000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYNN 0x02000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYNN 0x03000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNZN 0x04000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNZN 0x05000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYZN 0x06000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYZN 0x07000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNNW 0x08000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNNW 0x09000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYNW 0x0a000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYNW 0x0b000000 +#define NV50TCL_VP_ATTR_EN_0_6_NNZW 0x0c000000 +#define NV50TCL_VP_ATTR_EN_0_6_XNZW 0x0d000000 +#define NV50TCL_VP_ATTR_EN_0_6_NYZW 0x0e000000 +#define NV50TCL_VP_ATTR_EN_0_6_XYZW 0x0f000000 +#define NV50TCL_VP_ATTR_EN_0_5_SHIFT 20 +#define NV50TCL_VP_ATTR_EN_0_5_MASK 0x00f00000 +#define NV50TCL_VP_ATTR_EN_0_5_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_5_XNNN 0x00100000 +#define NV50TCL_VP_ATTR_EN_0_5_NYNN 0x00200000 +#define NV50TCL_VP_ATTR_EN_0_5_XYNN 0x00300000 +#define NV50TCL_VP_ATTR_EN_0_5_NNZN 0x00400000 +#define NV50TCL_VP_ATTR_EN_0_5_XNZN 0x00500000 +#define NV50TCL_VP_ATTR_EN_0_5_NYZN 0x00600000 +#define NV50TCL_VP_ATTR_EN_0_5_XYZN 0x00700000 +#define NV50TCL_VP_ATTR_EN_0_5_NNNW 0x00800000 +#define NV50TCL_VP_ATTR_EN_0_5_XNNW 0x00900000 +#define NV50TCL_VP_ATTR_EN_0_5_NYNW 0x00a00000 +#define NV50TCL_VP_ATTR_EN_0_5_XYNW 0x00b00000 +#define NV50TCL_VP_ATTR_EN_0_5_NNZW 0x00c00000 +#define NV50TCL_VP_ATTR_EN_0_5_XNZW 0x00d00000 +#define NV50TCL_VP_ATTR_EN_0_5_NYZW 0x00e00000 +#define NV50TCL_VP_ATTR_EN_0_5_XYZW 0x00f00000 +#define NV50TCL_VP_ATTR_EN_0_4_SHIFT 16 +#define NV50TCL_VP_ATTR_EN_0_4_MASK 0x000f0000 +#define NV50TCL_VP_ATTR_EN_0_4_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_4_XNNN 0x00010000 +#define NV50TCL_VP_ATTR_EN_0_4_NYNN 0x00020000 +#define NV50TCL_VP_ATTR_EN_0_4_XYNN 0x00030000 +#define NV50TCL_VP_ATTR_EN_0_4_NNZN 0x00040000 +#define NV50TCL_VP_ATTR_EN_0_4_XNZN 0x00050000 +#define NV50TCL_VP_ATTR_EN_0_4_NYZN 0x00060000 +#define NV50TCL_VP_ATTR_EN_0_4_XYZN 0x00070000 +#define NV50TCL_VP_ATTR_EN_0_4_NNNW 0x00080000 +#define NV50TCL_VP_ATTR_EN_0_4_XNNW 0x00090000 +#define NV50TCL_VP_ATTR_EN_0_4_NYNW 0x000a0000 +#define NV50TCL_VP_ATTR_EN_0_4_XYNW 0x000b0000 +#define NV50TCL_VP_ATTR_EN_0_4_NNZW 0x000c0000 +#define NV50TCL_VP_ATTR_EN_0_4_XNZW 0x000d0000 +#define NV50TCL_VP_ATTR_EN_0_4_NYZW 0x000e0000 +#define NV50TCL_VP_ATTR_EN_0_4_XYZW 0x000f0000 +#define NV50TCL_VP_ATTR_EN_0_3_SHIFT 12 +#define NV50TCL_VP_ATTR_EN_0_3_MASK 0x0000f000 +#define NV50TCL_VP_ATTR_EN_0_3_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_3_XNNN 0x00001000 +#define NV50TCL_VP_ATTR_EN_0_3_NYNN 0x00002000 +#define NV50TCL_VP_ATTR_EN_0_3_XYNN 0x00003000 +#define NV50TCL_VP_ATTR_EN_0_3_NNZN 0x00004000 +#define NV50TCL_VP_ATTR_EN_0_3_XNZN 0x00005000 +#define NV50TCL_VP_ATTR_EN_0_3_NYZN 0x00006000 +#define NV50TCL_VP_ATTR_EN_0_3_XYZN 0x00007000 +#define NV50TCL_VP_ATTR_EN_0_3_NNNW 0x00008000 +#define NV50TCL_VP_ATTR_EN_0_3_XNNW 0x00009000 +#define NV50TCL_VP_ATTR_EN_0_3_NYNW 0x0000a000 +#define NV50TCL_VP_ATTR_EN_0_3_XYNW 0x0000b000 +#define NV50TCL_VP_ATTR_EN_0_3_NNZW 0x0000c000 +#define NV50TCL_VP_ATTR_EN_0_3_XNZW 0x0000d000 +#define NV50TCL_VP_ATTR_EN_0_3_NYZW 0x0000e000 +#define NV50TCL_VP_ATTR_EN_0_3_XYZW 0x0000f000 +#define NV50TCL_VP_ATTR_EN_0_2_SHIFT 8 +#define NV50TCL_VP_ATTR_EN_0_2_MASK 0x00000f00 +#define NV50TCL_VP_ATTR_EN_0_2_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_2_XNNN 0x00000100 +#define NV50TCL_VP_ATTR_EN_0_2_NYNN 0x00000200 +#define NV50TCL_VP_ATTR_EN_0_2_XYNN 0x00000300 +#define NV50TCL_VP_ATTR_EN_0_2_NNZN 0x00000400 +#define NV50TCL_VP_ATTR_EN_0_2_XNZN 0x00000500 +#define NV50TCL_VP_ATTR_EN_0_2_NYZN 0x00000600 +#define NV50TCL_VP_ATTR_EN_0_2_XYZN 0x00000700 +#define NV50TCL_VP_ATTR_EN_0_2_NNNW 0x00000800 +#define NV50TCL_VP_ATTR_EN_0_2_XNNW 0x00000900 +#define NV50TCL_VP_ATTR_EN_0_2_NYNW 0x00000a00 +#define NV50TCL_VP_ATTR_EN_0_2_XYNW 0x00000b00 +#define NV50TCL_VP_ATTR_EN_0_2_NNZW 0x00000c00 +#define NV50TCL_VP_ATTR_EN_0_2_XNZW 0x00000d00 +#define NV50TCL_VP_ATTR_EN_0_2_NYZW 0x00000e00 +#define NV50TCL_VP_ATTR_EN_0_2_XYZW 0x00000f00 +#define NV50TCL_VP_ATTR_EN_0_1_SHIFT 4 +#define NV50TCL_VP_ATTR_EN_0_1_MASK 0x000000f0 +#define NV50TCL_VP_ATTR_EN_0_1_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_1_XNNN 0x00000010 +#define NV50TCL_VP_ATTR_EN_0_1_NYNN 0x00000020 +#define NV50TCL_VP_ATTR_EN_0_1_XYNN 0x00000030 +#define NV50TCL_VP_ATTR_EN_0_1_NNZN 0x00000040 +#define NV50TCL_VP_ATTR_EN_0_1_XNZN 0x00000050 +#define NV50TCL_VP_ATTR_EN_0_1_NYZN 0x00000060 +#define NV50TCL_VP_ATTR_EN_0_1_XYZN 0x00000070 +#define NV50TCL_VP_ATTR_EN_0_1_NNNW 0x00000080 +#define NV50TCL_VP_ATTR_EN_0_1_XNNW 0x00000090 +#define NV50TCL_VP_ATTR_EN_0_1_NYNW 0x000000a0 +#define NV50TCL_VP_ATTR_EN_0_1_XYNW 0x000000b0 +#define NV50TCL_VP_ATTR_EN_0_1_NNZW 0x000000c0 +#define NV50TCL_VP_ATTR_EN_0_1_XNZW 0x000000d0 +#define NV50TCL_VP_ATTR_EN_0_1_NYZW 0x000000e0 +#define NV50TCL_VP_ATTR_EN_0_1_XYZW 0x000000f0 +#define NV50TCL_VP_ATTR_EN_0_0_SHIFT 0 +#define NV50TCL_VP_ATTR_EN_0_0_MASK 0x0000000f +#define NV50TCL_VP_ATTR_EN_0_0_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_0_0_XNNN 0x00000001 +#define NV50TCL_VP_ATTR_EN_0_0_NYNN 0x00000002 +#define NV50TCL_VP_ATTR_EN_0_0_XYNN 0x00000003 +#define NV50TCL_VP_ATTR_EN_0_0_NNZN 0x00000004 +#define NV50TCL_VP_ATTR_EN_0_0_XNZN 0x00000005 +#define NV50TCL_VP_ATTR_EN_0_0_NYZN 0x00000006 +#define NV50TCL_VP_ATTR_EN_0_0_XYZN 0x00000007 +#define NV50TCL_VP_ATTR_EN_0_0_NNNW 0x00000008 +#define NV50TCL_VP_ATTR_EN_0_0_XNNW 0x00000009 +#define NV50TCL_VP_ATTR_EN_0_0_NYNW 0x0000000a +#define NV50TCL_VP_ATTR_EN_0_0_XYNW 0x0000000b +#define NV50TCL_VP_ATTR_EN_0_0_NNZW 0x0000000c +#define NV50TCL_VP_ATTR_EN_0_0_XNZW 0x0000000d +#define NV50TCL_VP_ATTR_EN_0_0_NYZW 0x0000000e +#define NV50TCL_VP_ATTR_EN_0_0_XYZW 0x0000000f +#define NV50TCL_VP_ATTR_EN_1 0x00001654 +#define NV50TCL_VP_ATTR_EN_1_15_SHIFT 28 +#define NV50TCL_VP_ATTR_EN_1_15_MASK 0xf0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNNN 0x10000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYNN 0x20000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYNN 0x30000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNZN 0x40000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNZN 0x50000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYZN 0x60000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYZN 0x70000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNNW 0x80000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNNW 0x90000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYNW 0xa0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYNW 0xb0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NNZW 0xc0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XNZW 0xd0000000 +#define NV50TCL_VP_ATTR_EN_1_15_NYZW 0xe0000000 +#define NV50TCL_VP_ATTR_EN_1_15_XYZW 0xf0000000 +#define NV50TCL_VP_ATTR_EN_1_14_SHIFT 24 +#define NV50TCL_VP_ATTR_EN_1_14_MASK 0x0f000000 +#define NV50TCL_VP_ATTR_EN_1_14_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNNN 0x01000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYNN 0x02000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYNN 0x03000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNZN 0x04000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNZN 0x05000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYZN 0x06000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYZN 0x07000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNNW 0x08000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNNW 0x09000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYNW 0x0a000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYNW 0x0b000000 +#define NV50TCL_VP_ATTR_EN_1_14_NNZW 0x0c000000 +#define NV50TCL_VP_ATTR_EN_1_14_XNZW 0x0d000000 +#define NV50TCL_VP_ATTR_EN_1_14_NYZW 0x0e000000 +#define NV50TCL_VP_ATTR_EN_1_14_XYZW 0x0f000000 +#define NV50TCL_VP_ATTR_EN_1_13_SHIFT 20 +#define NV50TCL_VP_ATTR_EN_1_13_MASK 0x00f00000 +#define NV50TCL_VP_ATTR_EN_1_13_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_13_XNNN 0x00100000 +#define NV50TCL_VP_ATTR_EN_1_13_NYNN 0x00200000 +#define NV50TCL_VP_ATTR_EN_1_13_XYNN 0x00300000 +#define NV50TCL_VP_ATTR_EN_1_13_NNZN 0x00400000 +#define NV50TCL_VP_ATTR_EN_1_13_XNZN 0x00500000 +#define NV50TCL_VP_ATTR_EN_1_13_NYZN 0x00600000 +#define NV50TCL_VP_ATTR_EN_1_13_XYZN 0x00700000 +#define NV50TCL_VP_ATTR_EN_1_13_NNNW 0x00800000 +#define NV50TCL_VP_ATTR_EN_1_13_XNNW 0x00900000 +#define NV50TCL_VP_ATTR_EN_1_13_NYNW 0x00a00000 +#define NV50TCL_VP_ATTR_EN_1_13_XYNW 0x00b00000 +#define NV50TCL_VP_ATTR_EN_1_13_NNZW 0x00c00000 +#define NV50TCL_VP_ATTR_EN_1_13_XNZW 0x00d00000 +#define NV50TCL_VP_ATTR_EN_1_13_NYZW 0x00e00000 +#define NV50TCL_VP_ATTR_EN_1_13_XYZW 0x00f00000 +#define NV50TCL_VP_ATTR_EN_1_12_SHIFT 16 +#define NV50TCL_VP_ATTR_EN_1_12_MASK 0x000f0000 +#define NV50TCL_VP_ATTR_EN_1_12_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_12_XNNN 0x00010000 +#define NV50TCL_VP_ATTR_EN_1_12_NYNN 0x00020000 +#define NV50TCL_VP_ATTR_EN_1_12_XYNN 0x00030000 +#define NV50TCL_VP_ATTR_EN_1_12_NNZN 0x00040000 +#define NV50TCL_VP_ATTR_EN_1_12_XNZN 0x00050000 +#define NV50TCL_VP_ATTR_EN_1_12_NYZN 0x00060000 +#define NV50TCL_VP_ATTR_EN_1_12_XYZN 0x00070000 +#define NV50TCL_VP_ATTR_EN_1_12_NNNW 0x00080000 +#define NV50TCL_VP_ATTR_EN_1_12_XNNW 0x00090000 +#define NV50TCL_VP_ATTR_EN_1_12_NYNW 0x000a0000 +#define NV50TCL_VP_ATTR_EN_1_12_XYNW 0x000b0000 +#define NV50TCL_VP_ATTR_EN_1_12_NNZW 0x000c0000 +#define NV50TCL_VP_ATTR_EN_1_12_XNZW 0x000d0000 +#define NV50TCL_VP_ATTR_EN_1_12_NYZW 0x000e0000 +#define NV50TCL_VP_ATTR_EN_1_12_XYZW 0x000f0000 +#define NV50TCL_VP_ATTR_EN_1_11_SHIFT 12 +#define NV50TCL_VP_ATTR_EN_1_11_MASK 0x0000f000 +#define NV50TCL_VP_ATTR_EN_1_11_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_11_XNNN 0x00001000 +#define NV50TCL_VP_ATTR_EN_1_11_NYNN 0x00002000 +#define NV50TCL_VP_ATTR_EN_1_11_XYNN 0x00003000 +#define NV50TCL_VP_ATTR_EN_1_11_NNZN 0x00004000 +#define NV50TCL_VP_ATTR_EN_1_11_XNZN 0x00005000 +#define NV50TCL_VP_ATTR_EN_1_11_NYZN 0x00006000 +#define NV50TCL_VP_ATTR_EN_1_11_XYZN 0x00007000 +#define NV50TCL_VP_ATTR_EN_1_11_NNNW 0x00008000 +#define NV50TCL_VP_ATTR_EN_1_11_XNNW 0x00009000 +#define NV50TCL_VP_ATTR_EN_1_11_NYNW 0x0000a000 +#define NV50TCL_VP_ATTR_EN_1_11_XYNW 0x0000b000 +#define NV50TCL_VP_ATTR_EN_1_11_NNZW 0x0000c000 +#define NV50TCL_VP_ATTR_EN_1_11_XNZW 0x0000d000 +#define NV50TCL_VP_ATTR_EN_1_11_NYZW 0x0000e000 +#define NV50TCL_VP_ATTR_EN_1_11_XYZW 0x0000f000 +#define NV50TCL_VP_ATTR_EN_1_10_SHIFT 8 +#define NV50TCL_VP_ATTR_EN_1_10_MASK 0x00000f00 +#define NV50TCL_VP_ATTR_EN_1_10_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_10_XNNN 0x00000100 +#define NV50TCL_VP_ATTR_EN_1_10_NYNN 0x00000200 +#define NV50TCL_VP_ATTR_EN_1_10_XYNN 0x00000300 +#define NV50TCL_VP_ATTR_EN_1_10_NNZN 0x00000400 +#define NV50TCL_VP_ATTR_EN_1_10_XNZN 0x00000500 +#define NV50TCL_VP_ATTR_EN_1_10_NYZN 0x00000600 +#define NV50TCL_VP_ATTR_EN_1_10_XYZN 0x00000700 +#define NV50TCL_VP_ATTR_EN_1_10_NNNW 0x00000800 +#define NV50TCL_VP_ATTR_EN_1_10_XNNW 0x00000900 +#define NV50TCL_VP_ATTR_EN_1_10_NYNW 0x00000a00 +#define NV50TCL_VP_ATTR_EN_1_10_XYNW 0x00000b00 +#define NV50TCL_VP_ATTR_EN_1_10_NNZW 0x00000c00 +#define NV50TCL_VP_ATTR_EN_1_10_XNZW 0x00000d00 +#define NV50TCL_VP_ATTR_EN_1_10_NYZW 0x00000e00 +#define NV50TCL_VP_ATTR_EN_1_10_XYZW 0x00000f00 +#define NV50TCL_VP_ATTR_EN_1_9_SHIFT 4 +#define NV50TCL_VP_ATTR_EN_1_9_MASK 0x000000f0 +#define NV50TCL_VP_ATTR_EN_1_9_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_9_XNNN 0x00000010 +#define NV50TCL_VP_ATTR_EN_1_9_NYNN 0x00000020 +#define NV50TCL_VP_ATTR_EN_1_9_XYNN 0x00000030 +#define NV50TCL_VP_ATTR_EN_1_9_NNZN 0x00000040 +#define NV50TCL_VP_ATTR_EN_1_9_XNZN 0x00000050 +#define NV50TCL_VP_ATTR_EN_1_9_NYZN 0x00000060 +#define NV50TCL_VP_ATTR_EN_1_9_XYZN 0x00000070 +#define NV50TCL_VP_ATTR_EN_1_9_NNNW 0x00000080 +#define NV50TCL_VP_ATTR_EN_1_9_XNNW 0x00000090 +#define NV50TCL_VP_ATTR_EN_1_9_NYNW 0x000000a0 +#define NV50TCL_VP_ATTR_EN_1_9_XYNW 0x000000b0 +#define NV50TCL_VP_ATTR_EN_1_9_NNZW 0x000000c0 +#define NV50TCL_VP_ATTR_EN_1_9_XNZW 0x000000d0 +#define NV50TCL_VP_ATTR_EN_1_9_NYZW 0x000000e0 +#define NV50TCL_VP_ATTR_EN_1_9_XYZW 0x000000f0 +#define NV50TCL_VP_ATTR_EN_1_8_SHIFT 0 +#define NV50TCL_VP_ATTR_EN_1_8_MASK 0x0000000f +#define NV50TCL_VP_ATTR_EN_1_8_NONE 0x00000000 +#define NV50TCL_VP_ATTR_EN_1_8_XNNN 0x00000001 +#define NV50TCL_VP_ATTR_EN_1_8_NYNN 0x00000002 +#define NV50TCL_VP_ATTR_EN_1_8_XYNN 0x00000003 +#define NV50TCL_VP_ATTR_EN_1_8_NNZN 0x00000004 +#define NV50TCL_VP_ATTR_EN_1_8_XNZN 0x00000005 +#define NV50TCL_VP_ATTR_EN_1_8_NYZN 0x00000006 +#define NV50TCL_VP_ATTR_EN_1_8_XYZN 0x00000007 +#define NV50TCL_VP_ATTR_EN_1_8_NNNW 0x00000008 +#define NV50TCL_VP_ATTR_EN_1_8_XNNW 0x00000009 +#define NV50TCL_VP_ATTR_EN_1_8_NYNW 0x0000000a +#define NV50TCL_VP_ATTR_EN_1_8_XYNW 0x0000000b +#define NV50TCL_VP_ATTR_EN_1_8_NNZW 0x0000000c +#define NV50TCL_VP_ATTR_EN_1_8_XNZW 0x0000000d +#define NV50TCL_VP_ATTR_EN_1_8_NYZW 0x0000000e +#define NV50TCL_VP_ATTR_EN_1_8_XYZW 0x0000000f +#define NV50TCL_LINE_STIPPLE_ENABLE 0x0000166c +#define NV50TCL_LINE_STIPPLE_PATTERN 0x00001680 +#define NV50TCL_POLYGON_STIPPLE_ENABLE 0x0000168c +#define NV50TCL_VP_REG_HPOS 0x000016bc +#define NV50TCL_VP_REG_HPOS_X_SHIFT 0 +#define NV50TCL_VP_REG_HPOS_X_MASK 0x000000ff +#define NV50TCL_VP_REG_HPOS_Y_SHIFT 8 +#define NV50TCL_VP_REG_HPOS_Y_MASK 0x0000ff00 +#define NV50TCL_VP_REG_HPOS_Z_SHIFT 16 +#define NV50TCL_VP_REG_HPOS_Z_MASK 0x00ff0000 +#define NV50TCL_VP_REG_HPOS_W_SHIFT 24 +#define NV50TCL_VP_REG_HPOS_W_MASK 0xff000000 +#define NV50TCL_VP_REG_COL0 0x000016c0 +#define NV50TCL_VP_REG_COL0_X_SHIFT 0 +#define NV50TCL_VP_REG_COL0_X_MASK 0x000000ff +#define NV50TCL_VP_REG_COL0_Y_SHIFT 8 +#define NV50TCL_VP_REG_COL0_Y_MASK 0x0000ff00 +#define NV50TCL_VP_REG_COL0_Z_SHIFT 16 +#define NV50TCL_VP_REG_COL0_Z_MASK 0x00ff0000 +#define NV50TCL_VP_REG_COL0_W_SHIFT 24 +#define NV50TCL_VP_REG_COL0_W_MASK 0xff000000 +#define NV50TCL_POLYGON_STIPPLE_PATTERN(x) (0x00001700+((x)*4)) +#define NV50TCL_POLYGON_STIPPLE_PATTERN__SIZE 0x00000020 +#define NV50TCL_CULL_FACE_ENABLE 0x00001918 +#define NV50TCL_FRONT_FACE 0x0000191c +#define NV50TCL_FRONT_FACE_CW 0x00000900 +#define NV50TCL_FRONT_FACE_CCW 0x00000901 +#define NV50TCL_CULL_FACE 0x00001920 +#define NV50TCL_CULL_FACE_FRONT 0x00000404 +#define NV50TCL_CULL_FACE_BACK 0x00000405 +#define NV50TCL_CULL_FACE_FRONT_AND_BACK 0x00000408 +#define NV50TCL_LOGIC_OP_ENABLE 0x000019c4 +#define NV50TCL_LOGIC_OP 0x000019c8 +#define NV50TCL_LOGIC_OP_CLEAR 0x00001500 +#define NV50TCL_LOGIC_OP_AND 0x00001501 +#define NV50TCL_LOGIC_OP_AND_REVERSE 0x00001502 +#define NV50TCL_LOGIC_OP_COPY 0x00001503 +#define NV50TCL_LOGIC_OP_AND_INVERTED 0x00001504 +#define NV50TCL_LOGIC_OP_NOOP 0x00001505 +#define NV50TCL_LOGIC_OP_XOR 0x00001506 +#define NV50TCL_LOGIC_OP_OR 0x00001507 +#define NV50TCL_LOGIC_OP_NOR 0x00001508 +#define NV50TCL_LOGIC_OP_EQUIV 0x00001509 +#define NV50TCL_LOGIC_OP_INVERT 0x0000150a +#define NV50TCL_LOGIC_OP_OR_REVERSE 0x0000150b +#define NV50TCL_LOGIC_OP_COPY_INVERTED 0x0000150c +#define NV50TCL_LOGIC_OP_OR_INVERTED 0x0000150d +#define NV50TCL_LOGIC_OP_NAND 0x0000150e +#define NV50TCL_LOGIC_OP_SET 0x0000150f +#define NV50TCL_CLEAR_BUFFERS 0x000019d0 +#define NV50TCL_COLOR_MASK(x) (0x00001a00+((x)*4)) +#define NV50TCL_COLOR_MASK__SIZE 0x00000008 +#define NV50TCL_COLOR_MASK_R_SHIFT 0 +#define NV50TCL_COLOR_MASK_R_MASK 0x0000000f +#define NV50TCL_COLOR_MASK_G_SHIFT 4 +#define NV50TCL_COLOR_MASK_G_MASK 0x000000f0 +#define NV50TCL_COLOR_MASK_B_SHIFT 8 +#define NV50TCL_COLOR_MASK_B_MASK 0x00000f00 +#define NV50TCL_COLOR_MASK_A_SHIFT 12 +#define NV50TCL_COLOR_MASK_A_MASK 0x0000f000 + + +#define NV50_COMPUTE 0x000050c0 + +#define NV50_COMPUTE_DMA_UNK0 0x000001a0 +#define NV50_COMPUTE_DMA_STATUS 0x000001a4 +#define NV50_COMPUTE_DMA_UNK1 0x000001b8 +#define NV50_COMPUTE_DMA_UNK2 0x000001bc +#define NV50_COMPUTE_DMA_UNK3 0x000001c0 +#define NV50_COMPUTE_UNK4_HIGH 0x00000210 +#define NV50_COMPUTE_UNK4_LOW 0x00000214 +#define NV50_COMPUTE_UNK5_HIGH 0x00000218 +#define NV50_COMPUTE_UNK5_LOW 0x0000021c +#define NV50_COMPUTE_UNK6_HIGH 0x00000294 +#define NV50_COMPUTE_UNK6_LOW 0x00000298 +#define NV50_COMPUTE_CONST_BASE_HIGH 0x000002a4 +#define NV50_COMPUTE_CONST_BASE_LO 0x000002a8 +#define NV50_COMPUTE_CONST_SIZE_SEG 0x000002ac +#define NV50_COMPUTE_REG_COUNT 0x000002c0 +#define NV50_COMPUTE_STATUS_HIGH 0x00000310 +#define NV50_COMPUTE_STATUS_LOW 0x00000314 +#define NV50_COMPUTE_EXECUTE 0x0000031c +#define NV50_COMPUTE_USER_PARAM_COUNT 0x00000374 +#define NV50_COMPUTE_GRIDDIM_YX 0x000003a4 +#define NV50_COMPUTE_SHARED_SIZE 0x000003a8 +#define NV50_COMPUTE_BLOCKDIM_YX 0x000003ac +#define NV50_COMPUTE_BLOCKDIM_Z 0x000003b0 +#define NV50_COMPUTE_CALL_ADDRESS 0x000003b4 +#define NV50_COMPUTE_GLOBAL_BASE_HIGH(x) (0x00000400+((x)*32)) +#define NV50_COMPUTE_GLOBAL_BASE_HIGH__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_BASE_LOW(x) (0x00000404+((x)*32)) +#define NV50_COMPUTE_GLOBAL_BASE_LOW__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH(x) (0x00000408+((x)*32)) +#define NV50_COMPUTE_GLOBAL_LIMIT_HIGH__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_LIMIT_LOW(x) (0x0000040c+((x)*32)) +#define NV50_COMPUTE_GLOBAL_LIMIT_LOW__SIZE 0x00000010 +#define NV50_COMPUTE_GLOBAL_UNK(x) (0x00000410+((x)*32)) +#define NV50_COMPUTE_GLOBAL_UNK__SIZE 0x00000010 +#define NV50_COMPUTE_USER_PARAM(x) (0x00000600+((x)*4)) +#define NV50_COMPUTE_USER_PARAM__SIZE 0x00000040 + + +#define NV54TCL 0x00008297 + + + +#endif /* NOUVEAU_REG_H */ diff --git a/src/gallium/drivers/nouveau/nouveau_device.h b/src/gallium/drivers/nouveau/nouveau_device.h new file mode 100644 index 0000000000..e25e89fedd --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_device.h @@ -0,0 +1,30 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_DEVICE_H__ +#define __NOUVEAU_DEVICE_H__ + +struct nouveau_device { + unsigned chipset; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h new file mode 100644 index 0000000000..ff97aaa9af --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h @@ -0,0 +1,196 @@ +#ifndef __NOUVEAU_GLDEFS_H__ +#define __NOUVEAU_GLDEFS_H__ + +static INLINE unsigned +nvgl_blend_func(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return 0x0000; + case PIPE_BLENDFACTOR_ONE: + return 0x0001; + case PIPE_BLENDFACTOR_SRC_COLOR: + return 0x0300; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 0x0301; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return 0x0302; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return 0x0303; + case PIPE_BLENDFACTOR_DST_ALPHA: + return 0x0304; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return 0x0305; + case PIPE_BLENDFACTOR_DST_COLOR: + return 0x0306; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 0x0307; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return 0x0308; + case PIPE_BLENDFACTOR_CONST_COLOR: + return 0x8001; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 0x8002; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return 0x8003; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return 0x8004; + default: + return 0x0000; + } +} + +static INLINE unsigned +nvgl_blend_eqn(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return 0x8006; + case PIPE_BLEND_MIN: + return 0x8007; + case PIPE_BLEND_MAX: + return 0x8008; + case PIPE_BLEND_SUBTRACT: + return 0x800a; + case PIPE_BLEND_REVERSE_SUBTRACT: + return 0x800b; + default: + return 0x8006; + } +} + +static INLINE unsigned +nvgl_logicop_func(unsigned func) +{ + switch (func) { + case PIPE_LOGICOP_CLEAR: + return 0x1500; + case PIPE_LOGICOP_NOR: + return 0x1508; + case PIPE_LOGICOP_AND_INVERTED: + return 0x1504; + case PIPE_LOGICOP_COPY_INVERTED: + return 0x150c; + case PIPE_LOGICOP_AND_REVERSE: + return 0x1502; + case PIPE_LOGICOP_INVERT: + return 0x150a; + case PIPE_LOGICOP_XOR: + return 0x1506; + case PIPE_LOGICOP_NAND: + return 0x150e; + case PIPE_LOGICOP_AND: + return 0x1501; + case PIPE_LOGICOP_EQUIV: + return 0x1509; + case PIPE_LOGICOP_NOOP: + return 0x1505; + case PIPE_LOGICOP_OR_INVERTED: + return 0x150d; + case PIPE_LOGICOP_COPY: + return 0x1503; + case PIPE_LOGICOP_OR_REVERSE: + return 0x150b; + case PIPE_LOGICOP_OR: + return 0x1507; + case PIPE_LOGICOP_SET: + return 0x150f; + default: + return 0x1505; + } +} + +static INLINE unsigned +nvgl_comparison_op(unsigned op) +{ + switch (op) { + case PIPE_FUNC_NEVER: + return 0x0200; + case PIPE_FUNC_LESS: + return 0x0201; + case PIPE_FUNC_EQUAL: + return 0x0202; + case PIPE_FUNC_LEQUAL: + return 0x0203; + case PIPE_FUNC_GREATER: + return 0x0204; + case PIPE_FUNC_NOTEQUAL: + return 0x0205; + case PIPE_FUNC_GEQUAL: + return 0x0206; + case PIPE_FUNC_ALWAYS: + return 0x0207; + default: + return 0x0207; + } +} + +static INLINE unsigned +nvgl_polygon_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_POINT: + return 0x1b00; + case PIPE_POLYGON_MODE_LINE: + return 0x1b01; + case PIPE_POLYGON_MODE_FILL: + return 0x1b02; + default: + return 0x1b02; + } +} + +static INLINE unsigned +nvgl_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_ZERO: + return 0x0000; + case PIPE_STENCIL_OP_INVERT: + return 0x150a; + case PIPE_STENCIL_OP_KEEP: + return 0x1e00; + case PIPE_STENCIL_OP_REPLACE: + return 0x1e01; + case PIPE_STENCIL_OP_INCR: + return 0x1e02; + case PIPE_STENCIL_OP_DECR: + return 0x1e03; + case PIPE_STENCIL_OP_INCR_WRAP: + return 0x8507; + case PIPE_STENCIL_OP_DECR_WRAP: + return 0x8508; + default: + return 0x1e00; + } +} + +static INLINE unsigned +nvgl_primitive(unsigned prim) { + switch (prim) { + case PIPE_PRIM_POINTS: + return 0x0001; + case PIPE_PRIM_LINES: + return 0x0002; + case PIPE_PRIM_LINE_LOOP: + return 0x0003; + case PIPE_PRIM_LINE_STRIP: + return 0x0004; + case PIPE_PRIM_TRIANGLES: + return 0x0005; + case PIPE_PRIM_TRIANGLE_STRIP: + return 0x0006; + case PIPE_PRIM_TRIANGLE_FAN: + return 0x0007; + case PIPE_PRIM_QUADS: + return 0x0008; + case PIPE_PRIM_QUAD_STRIP: + return 0x0009; + case PIPE_PRIM_POLYGON: + return 0x000a; + default: + return 0; + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_grobj.h b/src/gallium/drivers/nouveau/nouveau_grobj.h new file mode 100644 index 0000000000..8f5abf9051 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_grobj.h @@ -0,0 +1,35 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_GROBJ_H__ +#define __NOUVEAU_GROBJ_H__ + +#include "nouveau_channel.h" + +struct nouveau_grobj { + struct nouveau_channel *channel; + int grclass; + uint32_t handle; + int subc; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_notifier.h b/src/gallium/drivers/nouveau/nouveau_notifier.h new file mode 100644 index 0000000000..35adde1e32 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_notifier.h @@ -0,0 +1,43 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_NOTIFIER_H__ +#define __NOUVEAU_NOTIFIER_H__ + +#define NV_NOTIFIER_SIZE 32 +#define NV_NOTIFY_TIME_0 0x00000000 +#define NV_NOTIFY_TIME_1 0x00000004 +#define NV_NOTIFY_RETURN_VALUE 0x00000008 +#define NV_NOTIFY_STATE 0x0000000C +#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000 +#define NV_NOTIFY_STATE_STATUS_SHIFT 24 +#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00 +#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01 +#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF +#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0 + +struct nouveau_notifier { + struct nouveau_channel *channel; + uint32_t handle; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h new file mode 100644 index 0000000000..54ef1c1291 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -0,0 +1,82 @@ +#ifndef __NOUVEAU_PUSH_H__ +#define __NOUVEAU_PUSH_H__ + +#include "nouveau/nouveau_winsys.h" + +#ifndef NOUVEAU_PUSH_CONTEXT +#error undefined push context +#endif + +#define OUT_RING(data) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + (*pc->nvws->channel->pushbuf->cur++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \ + pc->nvws->channel->pushbuf->cur += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \ + OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do { \ + BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ +} while(0) + +#define FIRE_RING(fence) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_flush(pc->nvws, 0, fence); \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \ + (bo), (data), (flags), (vor), (tor)); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + pc->nvws->channel->vram->handle, \ + pc->nvws->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ +#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \ + OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ + (flags), 0, 0); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_pushbuf.h b/src/gallium/drivers/nouveau/nouveau_pushbuf.h new file mode 100644 index 0000000000..1909765098 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_pushbuf.h @@ -0,0 +1,32 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_PUSHBUF_H__ +#define __NOUVEAU_PUSHBUF_H__ + +struct nouveau_pushbuf { + struct nouveau_channel *channel; + unsigned remaining; + uint32_t *cur; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_resource.h b/src/gallium/drivers/nouveau/nouveau_resource.h new file mode 100644 index 0000000000..1af7961d30 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_resource.h @@ -0,0 +1,37 @@ +/* + * Copyright 2007 Nouveau Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NOUVEAU_RESOURCE_H__ +#define __NOUVEAU_RESOURCE_H__ + +struct nouveau_resource { + struct nouveau_resource *prev; + struct nouveau_resource *next; + + int in_use; + void *priv; + + unsigned int start; + unsigned int size; +}; + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h new file mode 100644 index 0000000000..729988b095 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -0,0 +1,158 @@ +#ifndef __NOUVEAU_STATEOBJ_H__ +#define __NOUVEAU_STATEOBJ_H__ + +#include "pipe/p_debug.h" + +struct nouveau_stateobj_reloc { + struct pipe_buffer *bo; + + unsigned offset; + unsigned packet; + + unsigned data; + unsigned flags; + unsigned vor; + unsigned tor; +}; + +struct nouveau_stateobj { + int refcount; + + unsigned *push; + struct nouveau_stateobj_reloc *reloc; + + unsigned *cur; + unsigned cur_packet; + unsigned cur_reloc; +}; + +static INLINE struct nouveau_stateobj * +so_new(unsigned push, unsigned reloc) +{ + struct nouveau_stateobj *so; + + so = MALLOC(sizeof(struct nouveau_stateobj)); + so->refcount = 0; + so->push = MALLOC(sizeof(unsigned) * push); + so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + + so->cur = so->push; + so->cur_reloc = so->cur_packet = 0; + + return so; +} + +static INLINE void +so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) +{ + struct nouveau_stateobj *so = *pso; + + if (ref) { + ref->refcount++; + } + + if (so && --so->refcount <= 0) { + free(so->push); + free(so->reloc); + free(so); + } + + *pso = ref; +} + +static INLINE void +so_data(struct nouveau_stateobj *so, unsigned data) +{ + (*so->cur++) = (data); + so->cur_packet += 4; +} + +static INLINE void +so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +{ + so->cur_packet += (4 * size); + while (size--) + (*so->cur++) = (*data++); +} + +static INLINE void +so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, + unsigned mthd, unsigned size) +{ + so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); + so_data(so, (gr->subc << 13) | (size << 18) | mthd); +} + +static INLINE void +so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; + + r->bo = bo; + r->offset = so->cur - so->push; + r->packet = so->cur_packet; + r->data = data; + r->flags = flags; + r->vor = vor; + r->tor = tor; + so_data(so, data); +} + +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr = so->cur - so->push; + + for (i = 0; i < nr; i++) + debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); +} + +static INLINE void +so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned nr, i; + + nr = so->cur - so->push; + if (pb->remaining < nr) + nvws->push_flush(nvws, nr, NULL); + pb->remaining -= nr; + + memcpy(pb->cur, so->push, nr * 4); + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws, pb->cur + r->offset, r->bo, + r->data, r->flags, r->vor, r->tor); + } + pb->cur += nr; +} + +static INLINE void +so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned i; + + if (!so) + return; + + i = so->cur_reloc << 1; + if (nvws->channel->pushbuf->remaining < i) + nvws->push_flush(nvws, i, NULL); + nvws->channel->pushbuf->remaining -= i; + + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet, + (r->flags & + (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART)) | + NOUVEAU_BO_DUMMY, 0, 0); + nvws->push_reloc(nvws, pb->cur++, r->bo, r->data, + r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h new file mode 100644 index 0000000000..a10114beab --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -0,0 +1,91 @@ +#ifndef __NOUVEAU_UTIL_H__ +#define __NOUVEAU_UTIL_H__ + +/* Determine how many vertices can be pushed into the command stream. + * Where the remaining space isn't large enough to represent all verices, + * split the buffer at primitive boundaries. + * + * Returns a count of vertices that can be rendered, and an index to + * restart drawing at after a flush. + */ +static INLINE unsigned +nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, + unsigned mode, unsigned start, unsigned count, + unsigned *restart) +{ + int max, adj = 0; + + max = remaining - overhead; + if (max < 0) + return 0; + + max *= vpp; + if (max >= count) + return count; + + switch (mode) { + case PIPE_PRIM_POINTS: + break; + case PIPE_PRIM_LINES: + max = max & 1; + break; + case PIPE_PRIM_TRIANGLES: + max = max - (max % 3); + break; + case PIPE_PRIM_QUADS: + max = max & 3; + break; + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (max < 2) + max = 0; + adj = 1; + break; + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + if (max < 3) + max = 0; + adj = 2; + break; + case PIPE_PRIM_QUAD_STRIP: + if (max < 4) + max = 0; + adj = 3; + break; + default: + assert(0); + } + + *restart = start + max - adj; + return max; +} + +/* Integer base-2 logarithm, rounded towards zero. */ +static INLINE unsigned log2i(unsigned i) +{ + unsigned r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h new file mode 100644 index 0000000000..5535ebb6a9 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -0,0 +1,98 @@ +#ifndef NOUVEAU_WINSYS_H +#define NOUVEAU_WINSYS_H + +#include <stdint.h> +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000) +#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001) + +#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16) + +#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16) +#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) + +struct nouveau_winsys { + struct nouveau_context *nv; + + struct nouveau_channel *channel; + + int (*res_init)(struct nouveau_resource **heap, unsigned start, + unsigned size); + int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **); + void (*res_free)(struct nouveau_resource **); + + int (*push_reloc)(struct nouveau_winsys *, void *ptr, + struct pipe_buffer *, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor); + int (*push_flush)(struct nouveau_winsys *, unsigned size, + struct pipe_fence_handle **fence); + + int (*grobj_alloc)(struct nouveau_winsys *, int grclass, + struct nouveau_grobj **); + void (*grobj_free)(struct nouveau_grobj **); + + int (*notifier_alloc)(struct nouveau_winsys *, int count, + struct nouveau_notifier **); + void (*notifier_free)(struct nouveau_notifier **); + void (*notifier_reset)(struct nouveau_notifier *, int id); + uint32_t (*notifier_status)(struct nouveau_notifier *, int id); + uint32_t (*notifier_retval)(struct nouveau_notifier *, int id); + int (*notifier_wait)(struct nouveau_notifier *, int id, + int status, int timeout); + + int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned); + int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); +}; + +extern struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv04_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv10_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv20_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv30_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv40_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv50_create(struct pipe_screen *, unsigned pctx_id); + +#endif diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile new file mode 100644 index 0000000000..5ea51a2f42 --- /dev/null +++ b/src/gallium/drivers/nv04/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv04 + +DRIVER_SOURCES = \ + nv04_clear.c \ + nv04_context.c \ + nv04_fragprog.c \ + nv04_fragtex.c \ + nv04_miptree.c \ + nv04_prim_vbuf.c \ + nv04_screen.c \ + nv04_state.c \ + nv04_state_emit.c \ + nv04_surface.c \ + nv04_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c new file mode 100644 index 0000000000..01cacd36fe --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" + +void +nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c new file mode 100644 index 0000000000..9f75253363 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -0,0 +1,106 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + FIRE_RING(fence); +} + +static void +nv04_destroy(struct pipe_context *pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + if (nv04->draw) + draw_destroy(nv04->draw); + + FREE(nv04); +} + +static void +nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +static boolean +nv04_init_hwctx(struct nv04_context *nv04) +{ + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); + OUT_RING(0); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(0); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(0x40182800); +// OUT_RING(1<<20/*no cull*/); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); +// OUT_RING(0x24|(1<<6)|(1<<8)); + OUT_RING(0x120001a4); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(0x332213a1); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(0x11001010); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(0x0); +// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +// OUT_RING(SCREEN_OFFSET); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(0xff000000); + + + + FIRE_RING (NULL); + return TRUE; +} + +struct pipe_context * +nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_context *nv04; + struct nouveau_winsys *nvws = screen->nvws; + + nv04 = CALLOC(1, sizeof(struct nv04_context)); + if (!nv04) + return NULL; + nv04->screen = screen; + nv04->pctx_id = pctx_id; + + nv04->nvws = nvws; + + nv04->pipe.winsys = ws; + nv04->pipe.screen = pscreen; + nv04->pipe.destroy = nv04_destroy; + nv04->pipe.set_edgeflags = nv04_set_edgeflags; + nv04->pipe.draw_arrays = nv04_draw_arrays; + nv04->pipe.draw_elements = nv04_draw_elements; + nv04->pipe.clear = nv04_clear; + nv04->pipe.flush = nv04_flush; + + nv04_init_surface_functions(nv04); + nv04_init_state_functions(nv04); + + nv04->draw = draw_create(); + assert(nv04->draw); + draw_wide_point_threshold(nv04->draw, 0.0); + draw_wide_line_threshold(nv04->draw, 0.0); + draw_enable_line_stipple(nv04->draw, FALSE); + draw_enable_point_sprites(nv04->draw, FALSE); + draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04)); + + nv04_init_hwctx(nv04); + + return &nv04->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h new file mode 100644 index 0000000000..3e6a085270 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -0,0 +1,146 @@ +#ifndef __NV04_CONTEXT_H__ +#define __NV04_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv04_screen *ctx = nv04->screen +#include "nouveau/nouveau_push.h" + +#include "nv04_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#include "nv04_screen.h" + +#define NV04_NEW_VERTPROG (1 << 1) +#define NV04_NEW_FRAGPROG (1 << 2) +#define NV04_NEW_BLEND (1 << 3) +#define NV04_NEW_RAST (1 << 4) +#define NV04_NEW_CONTROL (1 << 5) +#define NV04_NEW_VIEWPORT (1 << 6) +#define NV04_NEW_SAMPLER (1 << 7) + +struct nv04_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv04_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + int chipset; + struct nouveau_notifier *sync; + + uint32_t dirty; + + struct nv04_blend_state *blend; + struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct nv04_fragtex_state fragtex; + struct nv04_rasterizer_state *rast; + struct nv04_depth_stencil_alpha_state *dsa; + + struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[16]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + + struct vertex_info vertex_info; + struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv04_vertex_program *active; + + struct nv04_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv04_fragment_program *active; + + struct nv04_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_buffers; + unsigned num_vertex_elements; + + struct pipe_viewport_state viewport; +}; + +static INLINE struct nv04_context * +nv04_context(struct pipe_context *pipe) +{ + return (struct nv04_context *)pipe; +} + +extern void nv04_init_state_functions(struct nv04_context *nv04); +extern void nv04_init_surface_functions(struct nv04_context *nv04); +extern void nv04_init_miptree_functions(struct pipe_screen *screen); + +/* nv04_clear.c */ +extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv04_draw.c */ +extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04); + +/* nv04_fragprog.c */ +extern void nv04_fragprog_bind(struct nv04_context *, + struct nv04_fragment_program *); +extern void nv04_fragprog_destroy(struct nv04_context *, + struct nv04_fragment_program *); + +/* nv04_fragtex.c */ +extern void nv04_fragtex_bind(struct nv04_context *); + +/* nv04_prim_vbuf.c */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ); + +/* nv04_state.c and friends */ +extern void nv04_emit_hw_state(struct nv04_context *nv04); +extern void nv04_state_tex_update(struct nv04_context *nv04); + +/* nv04_vbo.c */ +extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c new file mode 100644 index 0000000000..8a2af41fe0 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv04_context.h" + +void +nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp) +{ +} + +void +nv04_fragprog_destroy(struct nv04_context *nv04, + struct nv04_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c new file mode 100644 index 0000000000..21f990fd53 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -0,0 +1,73 @@ +#include "nv04_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + PIPE_FORMAT_##m, \ + NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ +} + +struct nv04_texture_format { + uint pipe; + int format; +}; + +static struct nv04_texture_format +nv04_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(X8R8G8B8_UNORM, X8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM, Y8 ), + _(A8_UNORM, Y8 ), +}; + +static uint32_t +nv04_fragtex_format(uint pipe_format) +{ + struct nv04_texture_format *tf = nv04_texture_formats; + int i; + + for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { + if (tf->pipe == pipe_format) + return tf->format; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return 0; +} + + +static void +nv04_fragtex_build(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + switch (pt->target) { + case PIPE_TEXTURE_2D: + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + | nv04_fragtex_format(pt->format) + | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + ; +} + + +void +nv04_fragtex_bind(struct nv04_context *nv04) +{ + nv04_fragtex_build(nv04, 0); +} + diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c new file mode 100644 index 0000000000..0cbb91e187 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -0,0 +1,138 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_miptree_layout(struct nv04_miptree *nv04mt) +{ + struct pipe_texture *pt = &nv04mt->base; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + nr_faces = 1; + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + nv04mt->level[l].pitch = pt->width[0] * pt->block.size; + nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; + + nv04mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv04mt->level[l].image_offset[f] = offset; + offset += nv04mt->level[l].pitch * pt->height[l]; + } + } + + nv04mt->total_size = offset; +} + +static struct pipe_texture * +nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv04_miptree *mt; + + mt = MALLOC(sizeof(struct nv04_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv04_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv04_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv04_miptree *nv04mt = (struct nv04_miptree *)mt; + int l; + + pipe_buffer_reference(screen, &nv04mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv04mt->level[l].image_offset) + FREE(nv04mt->level[l].image_offset); + } + FREE(nv04mt); + } +} + +static struct pipe_surface * +nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv04mt->level[level].pitch; + ps->refcount = 1; + ps->winsys = pscreen->winsys; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv04mt->level[level].image_offset[face]; + } else { + ps->offset = nv04mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv04_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ +} + +void +nv04_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv04_miptree_create; + pscreen->texture_release = nv04_miptree_release; + pscreen->get_tex_surface = nv04_miptree_surface_new; + pscreen->tex_surface_release = nv04_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c new file mode 100644 index 0000000000..19979fff79 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -0,0 +1,309 @@ + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_compiler.h" + +#include "draw/draw_vbuf.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#define VERTEX_SIZE 40 +#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each + +/** + * Primitive renderer for nv04. + */ +struct nv04_vbuf_render { + struct vbuf_render base; + + struct nv04_context *nv04; + + /** Vertex buffer */ + unsigned char* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Current primitive */ + unsigned prim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv04_vbuf_render * +nv04_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv04_vbuf_render *)render; +} + + +static const struct vertex_info * +nv04_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + struct nv04_context *nv04 = nv04_render->nv04; + return &nv04->vertex_info; +} + + +static void * +nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE); + assert(!nv04_render->buffer); + + return nv04_render->buffer; +} + + +static boolean +nv04_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + if (prim <= PIPE_PRIM_LINE_STRIP) + return FALSE; + + nv04_render->prim = prim; + return TRUE; +} + +static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(buffer + VERTEX_SIZE * v5,8); + OUT_RING(0xFEDCBA); +} + +static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RING(0xFED); +} + +static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RING(0xFECEDC); +} + +static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for( i=0; i< nr_indices-5; i+=6) + nv04_2triangles(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3], + indices[i+4], + indices[i+5] + ); + if (i != nr_indices) + { + nv04_1triangle(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2] + ); + i+=3; + } + if (i != nr_indices) + NOUVEAU_ERR("Houston, we have lost some vertices\n"); +} + +static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + for(i = 0; i<nr_indices; i+=14) + { + int numvert = MIN2(16, nr_indices - i); + int numtri = numvert - 2; + if (numvert<3) + break; + + BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + for(j = 0; j<numvert; j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + + BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + for(j = 0; j<numtri/2; j++ ) + OUT_RING(striptbl[j]); + if (numtri%2) + OUT_RING(striptbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + + for(i = 1; i<nr_indices; i+=14) + { + int numvert=MIN2(15, nr_indices - i); + int numtri=numvert-2; + if (numvert < 3) + break; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + + for(j=0;j<numvert;j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + + BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + for(j = 0; j<numtri/2; j++) + OUT_RING(fantbl[j]); + if (numtri%2) + OUT_RING(fantbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for(i = 0; i < nr_indices; i += 4) + nv04_1quad(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3] + ); +} + + +static void +nv04_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + // emit the indices + switch( nv04_render->prim ) + { + case PIPE_PRIM_TRIANGLES: + nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_TRIANGLE_STRIP: + nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUADS: + nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices); + break; + default: + NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim); + break; + } +} + + +static void +nv04_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + free(nv04_render->buffer); + nv04_render->buffer = NULL; +} + + +static void +nv04_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + FREE(nv04_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv04_vbuf_render_create( struct nv04_context *nv04 ) +{ + struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render); + + nv04_render->nv04 = nv04; + + nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE; + nv04_render->base.max_indices = 65536; + nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info; + nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices; + nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive; + nv04_render->base.draw = nv04_vbuf_render_draw; + nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices; + nv04_render->base.destroy = nv04_vbuf_render_destroy; + + return &nv04_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv04_vbuf_render_create(nv04); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv04->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c new file mode 100644 index 0000000000..65eacde6b2 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -0,0 +1,214 @@ +#include "pipe/p_screen.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static const char * +nv04_screen_get_name(struct pipe_screen *screen) +{ + struct nv04_screen *nv04screen = nv04_screen(screen); + struct nouveau_device *dev = nv04screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv04_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv04_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 1; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 0; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv04_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv04_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + +static void +nv04_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->fahrenheit); + + FREE(pscreen); +} + +struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen); + unsigned fahrenheit_class = 0, sub3d_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + if (chipset>=0x20) { + fahrenheit_class = 0; + sub3d_class = 0; + } else if (chipset>=0x10) { + fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV10_CONTEXT_SURFACES_3D; + } else { + fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV04_CONTEXT_SURFACES_3D; + } + + if (!fahrenheit_class) { + NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset); + return NULL; + } + + /* 3D object */ + ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return NULL; + } + + /* 3D surface object */ + ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d); + if (ret) { + NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret); + return NULL; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv04_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv04_screen_destroy; + + screen->pipe.get_name = nv04_screen_get_name; + screen->pipe.get_vendor = nv04_screen_get_vendor; + screen->pipe.get_param = nv04_screen_get_param; + screen->pipe.get_paramf = nv04_screen_get_paramf; + + screen->pipe.is_format_supported = nv04_screen_is_format_supported; + + screen->pipe.surface_map = nv04_surface_map; + screen->pipe.surface_unmap = nv04_surface_unmap; + + nv04_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h new file mode 100644 index 0000000000..99a49cdf7a --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.h @@ -0,0 +1,25 @@ +#ifndef __NV04_SCREEN_H__ +#define __NV04_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv04_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + unsigned chipset; + + /* HW graphics objects */ + struct nouveau_grobj *fahrenheit; + struct nouveau_grobj *context_surfaces_3d; + struct nouveau_notifier *sync; + +}; + +static INLINE struct nv04_screen * +nv04_screen(struct pipe_screen *screen) +{ + return (struct nv04_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c new file mode 100644 index 0000000000..ff1933b550 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -0,0 +1,495 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +static void * +nv04_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv04_blend_state *cb; + + cb = MALLOC(sizeof(struct nv04_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + + return (void *)cb; +} + +static void +nv04_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->blend = (struct nv04_blend_state*)blend; + + nv04->dirty |= NV04_NEW_BLEND; +} + +static void +nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + } + return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; +} + +static void * +nv04_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + + struct nv04_sampler_state *ss; + uint32_t filter = 0; + + ss = MALLOC(sizeof(struct nv04_sampler_state)); + + ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + + if (cso->max_anisotropy > 1.0) { + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ss->filter = filter; + + return (void *)ss; +} + +static void +nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->sampler[unit] = sampler[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void +nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void * +nv04_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv04_rasterizer_state *rs; + + /*XXX: ignored: + * scissor + * points/lines (no hw support, emulated with tris in gallium) + */ + rs = MALLOC(sizeof(struct nv04_rasterizer_state)); + + rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + + return (void *)rs; +} + +static void +nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->rast = (struct nv04_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL)); + + nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND; +} + +static void +nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static INLINE uint32_t nv04_compare_func(uint32_t f) +{ + switch ( f ) { + case PIPE_FUNC_NEVER: return 1; + case PIPE_FUNC_LESS: return 2; + case PIPE_FUNC_EQUAL: return 3; + case PIPE_FUNC_LEQUAL: return 4; + case PIPE_FUNC_GREATER: return 5; + case PIPE_FUNC_NOTEQUAL: return 6; + case PIPE_FUNC_GEQUAL: return 7; + case PIPE_FUNC_ALWAYS: return 8; + } + NOUVEAU_MSG("Unable to find the function\n"); + return 0; +} + +static void * +nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv04_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); + + hw->control = float_to_ubyte(cso->alpha.ref); + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + + return (void *)hw; +} + +static void +nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->dsa = hwcso; + nv04->dirty |= NV04_NEW_CONTROL; +} + +static void +nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv04_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + return draw_create_vertex_shader(nv04->draw, templ); +} + +static void +nv04_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); + + nv04->dirty |= NV04_NEW_VERTPROG; +} + +static void +nv04_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv04_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv04_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv04_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + return (void *)fp; +} + +static void +nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04->fragprog.current = fp; + nv04->dirty |= NV04_NEW_FRAGPROG; +} + +static void +nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04_fragprog_destroy(nv04, fp); + free((void*)fp->pipe.tokens); + free(fp); +} + +static void +nv04_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ +} + +static void +nv04_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv04->vertprog.constant_buf = buf->buffer; + nv04->dirty |= NV04_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv04->fragprog.constant_buf = buf->buffer; + nv04->dirty |= NV04_NEW_FRAGPROG; + } +} + +static void +nv04_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct pipe_surface *rt, *zeta; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format = 0x108; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format = 0x103; + break; + default: + assert(0); + } + + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(rt_format); + + /* FIXME pitches have to be aligned ! */ + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(rt->stride|(zeta->stride<<16)); + OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (fb->zsbuf) { + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } +} + +static void +nv04_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv04_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ +/* struct nv04_context *nv04 = nv04_context(pipe); + + // XXX + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void +nv04_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->viewport = *viewport; + + draw_set_viewport_state(nv04->draw, &nv04->viewport); +} + +static void +nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + memcpy(nv04->vertex_buffer, buffers, count * sizeof(buffers[0])); + nv04->num_vertex_buffers = count; + + draw_set_vertex_buffers(nv04->draw, count, buffers); +} + +static void +nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *elements) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + nv04->num_vertex_elements = count; + draw_set_vertex_elements(nv04->draw, count, elements); +} + +void +nv04_init_state_functions(struct nv04_context *nv04) +{ + nv04->pipe.create_blend_state = nv04_blend_state_create; + nv04->pipe.bind_blend_state = nv04_blend_state_bind; + nv04->pipe.delete_blend_state = nv04_blend_state_delete; + + nv04->pipe.create_sampler_state = nv04_sampler_state_create; + nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; + nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + + nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; + nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; + nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete; + + nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create; + nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind; + nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete; + + nv04->pipe.create_vs_state = nv04_vp_state_create; + nv04->pipe.bind_vs_state = nv04_vp_state_bind; + nv04->pipe.delete_vs_state = nv04_vp_state_delete; + + nv04->pipe.create_fs_state = nv04_fp_state_create; + nv04->pipe.bind_fs_state = nv04_fp_state_bind; + nv04->pipe.delete_fs_state = nv04_fp_state_delete; + + nv04->pipe.set_blend_color = nv04_set_blend_color; + nv04->pipe.set_clip_state = nv04_set_clip_state; + nv04->pipe.set_constant_buffer = nv04_set_constant_buffer; + nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state; + nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple; + nv04->pipe.set_scissor_state = nv04_set_scissor_state; + nv04->pipe.set_viewport_state = nv04_set_viewport_state; + + nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers; + nv04->pipe.set_vertex_elements = nv04_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h new file mode 100644 index 0000000000..399f750dbe --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -0,0 +1,71 @@ +#ifndef __NV04_STATE_H__ +#define __NV04_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv04_blend_state { + uint32_t b_enable; + uint32_t b_src; + uint32_t b_dst; +}; + +struct nv04_fragtex_state { + uint32_t format; +}; + +struct nv04_sampler_state { + uint32_t filter; + uint32_t format; +}; + +struct nv04_depth_stencil_alpha_state { + uint32_t control; +}; + +struct nv04_rasterizer_state { + uint32_t blend; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv04_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct nv04_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv04_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv04_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c new file mode 100644 index 0000000000..0ad40a092e --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -0,0 +1,156 @@ +#include "nv04_context.h" +#include "nv04_state.h" + +static void nv04_vertex_layout(struct pipe_context* pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = nv04->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + switch (i) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + +static uint32_t nv04_blend_func(uint32_t f) +{ + switch ( f ) { + case PIPE_BLENDFACTOR_ZERO: return 0x1; + case PIPE_BLENDFACTOR_ONE: return 0x2; + case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4; + case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6; + case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8; + case PIPE_BLENDFACTOR_DST_COLOR: return 0x9; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB; + } + NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f); + return 0; +} + +static void nv04_emit_control(struct nv04_context* nv04) +{ + uint32_t control = nv04->dsa->control; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(control); +} + +static void nv04_emit_blend(struct nv04_context* nv04) +{ + uint32_t blend; + + blend=0x4; // texture MODULATE_ALPHA + blend|=0x20; // alpha is MSB + blend|=(2<<6); // flat shading + blend|=(1<<8); // persp correct + blend|=(0<<16); // no fog + blend|=(nv04->blend->b_enable<<20); + blend|=(nv04_blend_func(nv04->blend->b_src)<<24); + blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(blend); +} + +static void nv04_emit_sampler(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(nv04->sampler[unit]->filter); +} + +void +nv04_emit_hw_state(struct nv04_context *nv04) +{ + int i; + + if (nv04->dirty & NV04_NEW_VERTPROG) { + //nv04_vertprog_bind(nv04, nv04->vertprog.current); + nv04->dirty &= ~NV04_NEW_VERTPROG; + } + + if (nv04->dirty & NV04_NEW_FRAGPROG) { + nv04_fragprog_bind(nv04, nv04->fragprog.current); + /*XXX: clear NV04_NEW_FRAGPROG if no new program uploaded */ + nv04->dirty_samplers |= (1<<10); + nv04->dirty_samplers = 0; + } + + if (nv04->dirty & NV04_NEW_CONTROL) { + nv04->dirty &= ~NV04_NEW_CONTROL; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(nv04->dsa->control); + } + + if (nv04->dirty & NV04_NEW_BLEND) { + nv04->dirty &= ~NV04_NEW_BLEND; + + nv04_emit_blend(nv04); + } + + if (nv04->dirty & NV04_NEW_SAMPLER) { + nv04->dirty &= ~NV04_NEW_SAMPLER; + + nv04_emit_sampler(nv04, 0); + } + + if (nv04->dirty & NV04_NEW_VIEWPORT) { + nv04->dirty &= ~NV04_NEW_VIEWPORT; +// nv04_state_emit_viewport(nv04); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv04_vbo.c + */ + + /* Render target */ +/* BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(rt->stride|(zeta->stride<<16)); + OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (fb->zsbuf) { + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + }*/ + + /* Texture images */ + for (i = 0; i < 1; i++) { + if (!(nv04->fp_samplers & (1 << i))) + continue; + struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + } +} + diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c new file mode 100644 index 0000000000..9d9943ed4e --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -0,0 +1,64 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv04_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv04_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nouveau_winsys *nvws = nv04->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nouveau_winsys *nvws = nv04->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv04_init_surface_functions(struct nv04_context *nv04) +{ + nv04->pipe.surface_copy = nv04_surface_copy; + nv04->pipe.surface_fill = nv04_surface_fill; +} diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c new file mode 100644 index 0000000000..91f919d48e --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -0,0 +1,71 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv04_context *nv04 = nv04_context( pipe ); + struct draw_context *draw = nv04->draw; + unsigned i; + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vertex_buffer[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv04->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + /* draw! */ + draw_arrays(nv04->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vertex_buffer[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv04_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile new file mode 100644 index 0000000000..4ba7ce586d --- /dev/null +++ b/src/gallium/drivers/nv10/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv10 + +DRIVER_SOURCES = \ + nv10_clear.c \ + nv10_context.c \ + nv10_fragprog.c \ + nv10_fragtex.c \ + nv10_miptree.c \ + nv10_prim_vbuf.c \ + nv10_screen.c \ + nv10_state.c \ + nv10_state_emit.c \ + nv10_surface.c \ + nv10_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c new file mode 100644 index 0000000000..be7e09cf4b --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" + +void +nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c new file mode 100644 index 0000000000..4eb4ed9185 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -0,0 +1,296 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_flush(nv10->draw); + + FIRE_RING(fence); +} + +static void +nv10_destroy(struct pipe_context *pipe) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + if (nv10->draw) + draw_destroy(nv10->draw); + + FREE(nv10); +} + +static void nv10_init_hwctx(struct nv10_context *nv10) +{ + struct nv10_screen *screen = nv10->screen; + struct nouveau_winsys *nvws = screen->nvws; + int i; + float projectionmatrix[16]; + + BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (screen->sync->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + + for (i=1;i<8;i++) { + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, 0x290, 1); + OUT_RING ((0x10<<16)|1); + BEGIN_RING(celsius, 0x3f4, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + if (nv10->screen->celsius->grclass != NV10TCL) { + /* For nv11, nv17 */ + BEGIN_RING(celsius, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + /* Set state */ + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (0x207); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (0x30141010); + OUT_RING (0); + OUT_RING (0x20040000); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0x00000c00); + OUT_RING (0x18000000); + OUT_RING (0x300e0300); + OUT_RING (0x0c091c80); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (1); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x8006); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (0xff); + OUT_RING (0x207); + OUT_RING (0); + OUT_RING (0xff); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1d01); + BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (0x201); + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (0x1b02); + OUT_RING (0x1b02); + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (0x405); + OUT_RING (0x901); + BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + for (i=0;i<8;i++) { + OUT_RING (0); + } + BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (0x3fc00000); /* -1.50 */ + OUT_RING (0xbdb8aa0a); /* -0.09 */ + OUT_RING (0); /* 0.00 */ + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (0x802); + OUT_RING (2); + /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when + * using texturing, except when using the texture matrix + */ + BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (6); + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (0x01010101); + + /* Set vertex component */ + BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (0.0); + BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (1); + + memset(projectionmatrix, 0, sizeof(projectionmatrix)); + BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + projectionmatrix[0*4+0] = 1.0; + projectionmatrix[1*4+1] = 1.0; + projectionmatrix[2*4+2] = 1.0; + projectionmatrix[3*4+3] = 1.0; + for (i=0;i<16;i++) { + OUT_RINGf (projectionmatrix[i]); + } + + BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (0.0); + OUT_RINGf (16777216.0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (-2048.0); + OUT_RINGf (-2048.0); + OUT_RINGf (16777215.0 * 0.5); + OUT_RING (0); + + FIRE_RING (NULL); +} + +static void +nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv10_screen *screen = nv10_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv10_context *nv10; + struct nouveau_winsys *nvws = screen->nvws; + + nv10 = CALLOC(1, sizeof(struct nv10_context)); + if (!nv10) + return NULL; + nv10->screen = screen; + nv10->pctx_id = pctx_id; + + nv10->nvws = nvws; + + nv10->pipe.winsys = ws; + nv10->pipe.screen = pscreen; + nv10->pipe.destroy = nv10_destroy; + nv10->pipe.set_edgeflags = nv10_set_edgeflags; + nv10->pipe.draw_arrays = nv10_draw_arrays; + nv10->pipe.draw_elements = nv10_draw_elements; + nv10->pipe.clear = nv10_clear; + nv10->pipe.flush = nv10_flush; + + nv10_init_surface_functions(nv10); + nv10_init_state_functions(nv10); + + nv10->draw = draw_create(); + assert(nv10->draw); + draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); + + nv10_init_hwctx(nv10); + + return &nv10->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h new file mode 100644 index 0000000000..f3b56de25a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -0,0 +1,153 @@ +#ifndef __NV10_CONTEXT_H__ +#define __NV10_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv10_screen *ctx = nv10->screen +#include "nouveau/nouveau_push.h" + +#include "nv10_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV10_NEW_VERTPROG (1 << 0) +#define NV10_NEW_FRAGPROG (1 << 1) +#define NV10_NEW_VTXARRAYS (1 << 2) +#define NV10_NEW_BLEND (1 << 3) +#define NV10_NEW_BLENDCOL (1 << 4) +#define NV10_NEW_RAST (1 << 5) +#define NV10_NEW_DSA (1 << 6) +#define NV10_NEW_VIEWPORT (1 << 7) +#define NV10_NEW_SCISSOR (1 << 8) +#define NV10_NEW_FRAMEBUFFER (1 << 9) + +#include "nv10_screen.h" + +struct nv10_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv10_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + uint32_t dirty; + + struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + uint32_t lma_offset; + + struct nv10_blend_state *blend; + struct pipe_blend_color *blend_color; + struct nv10_rasterizer_state *rast; + struct nv10_depth_stencil_alpha_state *dsa; + struct pipe_viewport_state *viewport; + struct pipe_scissor_state *scissor; + struct pipe_framebuffer_state *framebuffer; + + //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + + struct vertex_info vertex_info; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[2]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + +/* struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv10_vertex_program *active; + + struct nv10_vertex_program *current; + } vertprog; +*/ + struct { + struct nv10_fragment_program *active; + + struct nv10_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv10_context * +nv10_context(struct pipe_context *pipe) +{ + return (struct nv10_context *)pipe; +} + +extern void nv10_init_state_functions(struct nv10_context *nv10); +extern void nv10_init_surface_functions(struct nv10_context *nv10); + +extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv10_clear.c */ +extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv10_draw.c */ +extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10); + +/* nv10_fragprog.c */ +extern void nv10_fragprog_bind(struct nv10_context *, + struct nv10_fragment_program *); +extern void nv10_fragprog_destroy(struct nv10_context *, + struct nv10_fragment_program *); + +/* nv10_fragtex.c */ +extern void nv10_fragtex_bind(struct nv10_context *); + +/* nv10_prim_vbuf.c */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); +extern void nv10_vtxbuf_bind(struct nv10_context* nv10); + +/* nv10_state.c and friends */ +extern void nv10_emit_hw_state(struct nv10_context *nv10); +extern void nv10_state_tex_update(struct nv10_context *nv10); + +/* nv10_vbo.c */ +extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c new file mode 100644 index 0000000000..698db5a16a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv10_context.h" + +void +nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp) +{ +} + +void +nv10_fragprog_destroy(struct nv10_context *nv10, + struct nv10_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c new file mode 100644 index 0000000000..27f2f87584 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -0,0 +1,124 @@ +#include "nv10_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV10TCL_TX_FORMAT_FORMAT_##tf, \ +} + +struct nv10_texture_format { + boolean defined; + uint pipe; + int format; +}; + +static struct nv10_texture_format +nv10_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM , L8 ), + _(A8_UNORM , A8 ), + _(A8L8_UNORM , A8L8 ), +// _(RGB_DXT1 , DXT1, ), +// _(RGBA_DXT1 , DXT1, ), +// _(RGBA_DXT3 , DXT3, ), +// _(RGBA_DXT5 , DXT5, ), + {}, +}; + +static struct nv10_texture_format * +nv10_fragtex_format(uint pipe_format) +{ + struct nv10_texture_format *tf = nv10_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv10_fragtex_build(struct nv10_context *nv10, int unit) +{ +#if 0 + struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; + struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; + struct pipe_texture *pt = &nv10mt->base; + struct nv10_texture_format *tf; + uint32_t txf, txs, txp; + + tf = nv10_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV10TCL_TX_FORMAT_CUBE_MAP; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +#endif +} + +void +nv10_fragtex_bind(struct nv10_context *nv10) +{ +#if 0 + struct nv10_fragment_program *fp = nv10->fragprog.active; + unsigned samplers, unit; + + samplers = nv10->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv10->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv10_fragtex_build(nv10, unit); + } + + nv10->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c new file mode 100644 index 0000000000..943f9e21e9 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -0,0 +1,149 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_miptree_layout(struct nv10_miptree *nv10mt) +{ + struct pipe_texture *pt = &nv10mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (swizzled) + nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + else + nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; + + nv10mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv10mt->level[l].image_offset[f] = offset; + offset += nv10mt->level[l].pitch * pt->height[l]; + } + } + + nv10mt->total_size = offset; +} + +static struct pipe_texture * +nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *mt; + + mt = MALLOC(sizeof(struct nv10_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv10_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt; + int l; + + pipe_buffer_reference(screen, &nv10mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv10mt->level[l].image_offset) + FREE(nv10mt->level[l].image_offset); + } + FREE(nv10mt); + } +} + +static void +nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) +{ +} + + +static struct pipe_surface * +nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (!ps) + return NULL; + pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv10mt->level[level].pitch; + ps->refcount = 1; + ps->winsys = screen->winsys; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv10mt->level[level].image_offset[face]; + } else { + ps->offset = nv10mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv10_miptree_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) +{ +} + +void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv10_miptree_create; + pscreen->texture_release = nv10_miptree_release; + pscreen->get_tex_surface = nv10_miptree_surface_get; + pscreen->tex_surface_release = nv10_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c new file mode 100644 index 0000000000..e7e81d3dff --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv10. + */ +struct nv10_vbuf_render { + struct vbuf_render base; + + struct nv10_context *nv10; + + /** Vertex buffer */ + struct pipe_buffer* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Hardware primitive */ + unsigned hwprim; +}; + + +void nv10_vtxbuf_bind( struct nv10_context* nv10 ) +{ + int i; + for(i = 0; i < 8; i++) { + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); + OUT_RING(0/*nv10->vtxbuf*/); + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); + OUT_RING(0/*XXX*/); + } +} + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv10_vbuf_render * +nv10_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv10_vbuf_render *)render; +} + + +static const struct vertex_info * +nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + + nv10_emit_hw_state(nv10); + + return &nv10->vertex_info; +} + + +static void * +nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + assert(!nv10_render->buffer); + nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + + nv10->dirty |= NV10_NEW_VTXARRAYS; + + return winsys->buffer_map(winsys, + nv10_render->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + + +static boolean +nv10_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + unsigned hwp = nvgl_primitive(prim); + if (hwp == 0) + return FALSE; + + nv10_render->hwprim = hwp; + return TRUE; +} + + +static void +nv10_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + int push, i; + + nv10_emit_hw_state(nv10); + + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(nv10_render->hwprim); + + if (nr_indices & 1) { + BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (indices[0]); + indices++; nr_indices--; + } + + while (nr_indices) { + // XXX too big/small ? check the size + push = MIN2(nr_indices, 1200 * 2); + + BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((indices[i+1] << 16) | indices[i]); + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (0); +} + + +static void +nv10_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + struct pipe_screen *pscreen = &nv10->screen->pipe; + + assert(nv10_render->buffer); + winsys->buffer_unmap(winsys, nv10_render->buffer); + pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); +} + + +static void +nv10_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + FREE(nv10_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv10_vbuf_render_create( struct nv10_context *nv10 ) +{ + struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render); + + nv10_render->nv10 = nv10; + + nv10_render->base.max_vertex_buffer_bytes = 16*1024; + nv10_render->base.max_indices = 1024; + nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; + nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; + nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; + nv10_render->base.draw = nv10_vbuf_render_draw; + nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices; + nv10_render->base.destroy = nv10_vbuf_render_destroy; + + return &nv10_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv10_vbuf_render_create(nv10); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv10->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c new file mode 100644 index 0000000000..4d9fbd4b5f --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -0,0 +1,210 @@ +#include "pipe/p_screen.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static const char * +nv10_screen_get_name(struct pipe_screen *screen) +{ + struct nv10_screen *nv10screen = nv10_screen(screen); + struct nouveau_device *dev = nv10screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv10_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv10_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 2; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv10_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 2.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv10_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + +static void +nv10_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv10_screen *screen = nv10_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->celsius); + + FREE(pscreen); +} + +struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); + unsigned celsius_class; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 3D object */ + if (chipset>=0x20) + celsius_class=NV11TCL; + else if (chipset>=0x17) + celsius_class=NV17TCL; + else if (chipset>=0x11) + celsius_class=NV11TCL; + else + celsius_class=NV10TCL; + + if (!celsius_class) { + NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv10_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv10_screen_destroy; + + screen->pipe.get_name = nv10_screen_get_name; + screen->pipe.get_vendor = nv10_screen_get_vendor; + screen->pipe.get_param = nv10_screen_get_param; + screen->pipe.get_paramf = nv10_screen_get_paramf; + + screen->pipe.is_format_supported = nv10_screen_is_format_supported; + + screen->pipe.surface_map = nv10_surface_map; + screen->pipe.surface_unmap = nv10_surface_unmap; + + nv10_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h new file mode 100644 index 0000000000..3f8750a13f --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -0,0 +1,22 @@ +#ifndef __NV10_SCREEN_H__ +#define __NV10_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv10_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + /* HW graphics objects */ + struct nouveau_grobj *celsius; + struct nouveau_notifier *sync; +}; + +static INLINE struct nv10_screen * +nv10_screen(struct pipe_screen *screen) +{ + return (struct nv10_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c new file mode 100644 index 0000000000..d2375aa2f6 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -0,0 +1,588 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +static void * +nv10_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv10_blend_state *cb; + + cb = MALLOC(sizeof(struct nv10_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv10_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->blend = (struct nv10_blend_state*)blend; + + nv10->dirty |= NV10_NEW_BLEND; +} + +static void +nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + } + + return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT; +} + +static void * +nv10_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv10_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv10_sampler_state)); + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy > 1.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 16.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV10TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_sampler[unit] = sampler[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void +nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void * +nv10_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv10_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = MALLOC(sizeof(struct nv10_rasterizer_state)); + + rs->templ = cso; + + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV10TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV10TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->rast = (struct nv10_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL)); + + nv10->dirty |= NV10_NEW_RAST; +} + +static void +nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv10_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; + hw->stencil.wmask = cso->stencil[0].write_mask; + hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); + hw->stencil.ref = cso->stencil[0].ref_value; + hw->stencil.vmask = cso->stencil[0].value_mask; + hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); + hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); + hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref); + + return (void *)hw; +} + +static void +nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa; + + nv10->dirty |= NV10_NEW_DSA; +} + +static void +nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv10_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + return draw_create_vertex_shader(nv10->draw, templ); +} + +static void +nv10_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); + + nv10->dirty |= NV10_NEW_VERTPROG; +} + +static void +nv10_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv10_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv10_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv10_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(cso->tokens, &fp->info); + + return (void *)fp; +} + +static void +nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10->fragprog.current = fp; + nv10->dirty |= NV10_NEW_FRAGPROG; +} + +static void +nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10_fragprog_destroy(nv10, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv10_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->blend_color = (struct pipe_blend_color*)bcol; + + nv10->dirty |= NV10_NEW_BLENDCOL; +} + +static void +nv10_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_set_clip_state(nv10->draw, clip); +} + +static void +nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv10->constbuf[shader], mapped, buf->size); + nv10->constbuf_nr[shader] = + buf->size / (4 * sizeof(float)); + ws->buffer_unmap(ws, buf->buffer); + } + } +} + +static void +nv10_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->framebuffer = (struct pipe_framebuffer_state*)fb; + + nv10->dirty |= NV10_NEW_FRAMEBUFFER; +} + +static void +nv10_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv10_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->scissor = (struct pipe_scissor_state*)s; + + nv10->dirty |= NV10_NEW_SCISSOR; +} + +static void +nv10_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->viewport = (struct pipe_viewport_state*)vpt; + + draw_set_viewport_state(nv10->draw, nv10->viewport); + + nv10->dirty |= NV10_NEW_VIEWPORT; +} + +static void +nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); + nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv10->draw, count, vb); +} + +static void +nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); + nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv10->draw, count, ve); +} + +void +nv10_init_state_functions(struct nv10_context *nv10) +{ + nv10->pipe.create_blend_state = nv10_blend_state_create; + nv10->pipe.bind_blend_state = nv10_blend_state_bind; + nv10->pipe.delete_blend_state = nv10_blend_state_delete; + + nv10->pipe.create_sampler_state = nv10_sampler_state_create; + nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; + nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + + nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; + nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; + nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete; + + nv10->pipe.create_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_create; + nv10->pipe.bind_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_bind; + nv10->pipe.delete_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_delete; + + nv10->pipe.create_vs_state = nv10_vp_state_create; + nv10->pipe.bind_vs_state = nv10_vp_state_bind; + nv10->pipe.delete_vs_state = nv10_vp_state_delete; + + nv10->pipe.create_fs_state = nv10_fp_state_create; + nv10->pipe.bind_fs_state = nv10_fp_state_bind; + nv10->pipe.delete_fs_state = nv10_fp_state_delete; + + nv10->pipe.set_blend_color = nv10_set_blend_color; + nv10->pipe.set_clip_state = nv10_set_clip_state; + nv10->pipe.set_constant_buffer = nv10_set_constant_buffer; + nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state; + nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple; + nv10->pipe.set_scissor_state = nv10_set_scissor_state; + nv10->pipe.set_viewport_state = nv10_set_viewport_state; + + nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers; + nv10->pipe.set_vertex_elements = nv10_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h new file mode 100644 index 0000000000..3a3fd0d4f4 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -0,0 +1,139 @@ +#ifndef __NV10_STATE_H__ +#define __NV10_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv10_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv10_sampler_state { + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv10_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + + uint32_t point_size; + + uint32_t poly_smooth_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv10_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv10_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv10_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv10_vertex_program_exec *insns; + unsigned nr_insns; + struct nv10_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv10_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv10_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv10_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + +struct nv10_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv10_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c new file mode 100644 index 0000000000..46c7e1d753 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -0,0 +1,303 @@ +#include "nv10_context.h" +#include "nv10_state.h" + +static void nv10_state_emit_blend(struct nv10_context* nv10) +{ + struct nv10_blend_state *b = nv10->blend; + + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (b->d_enable); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (b->b_enable); + OUT_RING (b->b_srcfunc); + OUT_RING (b->b_dstfunc); + + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (b->c_mask); +} + +static void nv10_state_emit_blend_color(struct nv10_context* nv10) +{ + struct pipe_blend_color *c = nv10->blend_color; + + BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + (float_to_ubyte(c->color[0]) << 16)| + (float_to_ubyte(c->color[1]) << 8) | + (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv10_state_emit_rast(struct nv10_context* nv10) +{ + struct nv10_rasterizer_state *r = nv10->rast; + + BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (r->shade_model); + OUT_RING (r->line_width); + + + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (r->point_size); + + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (r->poly_mode_front); + OUT_RING (r->poly_mode_back); + + + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (r->cull_face); + OUT_RING (r->front_face); + + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (r->line_smooth_en); + OUT_RING (r->poly_smooth_en); + + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (r->cull_face_en); +} + +static void nv10_state_emit_dsa(struct nv10_context* nv10) +{ + struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (d->depth.func); + + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (d->depth.write_enable); + + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (d->depth.test_enable); + +#if 0 + BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (d->stencil.enable); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (d->alpha.enabled); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (d->alpha.func); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (d->alpha.ref); +} + +static void nv10_state_emit_viewport(struct nv10_context* nv10) +{ +} + +static void nv10_state_emit_scissor(struct nv10_context* nv10) +{ + // XXX this is so not working +/* struct pipe_scissor_state *s = nv10->scissor; + BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv10_state_emit_framebuffer(struct nv10_context* nv10) +{ + struct pipe_framebuffer_state* fb = nv10->framebuffer; + struct pipe_surface *rt, *zeta = NULL; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + if (zeta) { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (zeta->stride << 16)); + } else { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (rt->stride << 16)); + } + + nv10->rt[0] = rt->buffer; + + if (zeta_format) + { + nv10->zeta = zeta->buffer; + } + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (((h - 1) << 16) | 0 | 0x08000800); +} + +static void nv10_vertex_layout(struct nv10_context *nv10) +{ + struct nv10_fragment_program *fp = nv10->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + +void +nv10_emit_hw_state(struct nv10_context *nv10) +{ + int i; + + if (nv10->dirty & NV10_NEW_VERTPROG) { + //nv10_vertprog_bind(nv10, nv10->vertprog.current); + nv10->dirty &= ~NV10_NEW_VERTPROG; + } + + if (nv10->dirty & NV10_NEW_FRAGPROG) { + nv10_fragprog_bind(nv10, nv10->fragprog.current); + /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ + nv10->dirty_samplers |= (1<<10); + nv10->dirty_samplers = 0; + } + + if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { + nv10_fragtex_bind(nv10); + nv10->dirty &= ~NV10_NEW_FRAGPROG; + } + + if (nv10->dirty & NV10_NEW_VTXARRAYS) { + nv10->dirty &= ~NV10_NEW_VTXARRAYS; + nv10_vertex_layout(nv10); + nv10_vtxbuf_bind(nv10); + } + + if (nv10->dirty & NV10_NEW_BLEND) { + nv10->dirty &= ~NV10_NEW_BLEND; + nv10_state_emit_blend(nv10); + } + + if (nv10->dirty & NV10_NEW_BLENDCOL) { + nv10->dirty &= ~NV10_NEW_BLENDCOL; + nv10_state_emit_blend_color(nv10); + } + + if (nv10->dirty & NV10_NEW_RAST) { + nv10->dirty &= ~NV10_NEW_RAST; + nv10_state_emit_rast(nv10); + } + + if (nv10->dirty & NV10_NEW_DSA) { + nv10->dirty &= ~NV10_NEW_DSA; + nv10_state_emit_dsa(nv10); + } + + if (nv10->dirty & NV10_NEW_VIEWPORT) { + nv10->dirty &= ~NV10_NEW_VIEWPORT; + nv10_state_emit_viewport(nv10); + } + + if (nv10->dirty & NV10_NEW_SCISSOR) { + nv10->dirty &= ~NV10_NEW_SCISSOR; + nv10_state_emit_scissor(nv10); + } + + if (nv10->dirty & NV10_NEW_FRAMEBUFFER) { + nv10->dirty &= ~NV10_NEW_FRAMEBUFFER; + nv10_state_emit_framebuffer(nv10); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv10_vbo.c + */ + + /* Render target */ +// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly +// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + if (nv10->zeta) { +// XXX +// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX for when we allocate LMA on nv17 */ +/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(nv10->zeta + lma_offset);*/ + } + + /* Vertex buffer */ + BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + /* Texture images */ + for (i = 0; i < 2; i++) { + if (!(nv10->fp_samplers & (1 << i))) + continue; + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, + NV10TCL_TX_FORMAT_DMA1); + } +} + diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c new file mode 100644 index 0000000000..be44c7bed5 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -0,0 +1,64 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv10_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv10_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nouveau_winsys *nvws = nv10->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv10_init_surface_functions(struct nv10_context *nv10) +{ + nv10->pipe.surface_copy = nv10_surface_copy; + nv10->pipe.surface_fill = nv10_surface_fill; +} diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c new file mode 100644 index 0000000000..d0e788ac03 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -0,0 +1,77 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv10_context *nv10 = nv10_context( pipe ); + struct draw_context *draw = nv10->draw; + unsigned i; + + nv10_emit_hw_state(nv10); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv10->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv10->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(draw, + nv10->constbuf[PIPE_SHADER_VERTEX], + nv10->constbuf_nr[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(nv10->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv10->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv10_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile new file mode 100644 index 0000000000..d777fd3d8b --- /dev/null +++ b/src/gallium/drivers/nv20/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv20 + +DRIVER_SOURCES = \ + nv20_clear.c \ + nv20_context.c \ + nv20_fragprog.c \ + nv20_fragtex.c \ + nv20_miptree.c \ + nv20_prim_vbuf.c \ + nv20_screen.c \ + nv20_state.c \ + nv20_state_emit.c \ + nv20_surface.c \ + nv20_vbo.c +# nv20_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c new file mode 100644 index 0000000000..81b6f3e78a --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" + +void +nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c new file mode 100644 index 0000000000..9a17f4af57 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -0,0 +1,419 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_flush(nv20->draw); + + FIRE_RING(fence); +} + +static void +nv20_destroy(struct pipe_context *pipe) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + if (nv20->draw) + draw_destroy(nv20->draw); + + FREE(nv20); +} + +static void nv20_init_hwctx(struct nv20_context *nv20) +{ + struct nv20_screen *screen = nv20->screen; + struct nouveau_winsys *nvws = screen->nvws; + int i; + float projectionmatrix[16]; + const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + + BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (screen->sync->handle); + BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); /* TEXTURE1 */ + BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); /* ZETA */ + + BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (0); /* renouveau: beef0351, unique */ + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING ((0xfff << 16) | 0x0); + + for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (0); + + BEGIN_RING(kelvin, 0x17e0, 3); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + + if (is_nv25tcl) { + BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + } else { + BEGIN_RING(kelvin, 0x1e68, 1); + OUT_RING (0x4b800000); /* 16777216.000000 */ + BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + } + + BEGIN_RING(kelvin, 0x290, 1); + OUT_RING ((0x10 << 16) | 1); + BEGIN_RING(kelvin, 0x9fc, 1); + OUT_RING (0); + BEGIN_RING(kelvin, 0x1d80, 1); + OUT_RING (1); + BEGIN_RING(kelvin, 0x9f8, 1); + OUT_RING (4); + BEGIN_RING(kelvin, 0x17ec, 3); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d88, 1); + OUT_RING (3); + + BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (nvws->channel->vram->handle); + } + BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (0); /* renouveau: beef1e10 */ + + BEGIN_RING(kelvin, 0x1e98, 1); + OUT_RING (0); +#if 0 + if (is_nv25tcl) { + BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + + BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + } +#endif + BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (0); + + BEGIN_RING(kelvin, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + +/* error: ILLEGAL_MTHD, PROTECTION_FAULT + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (0.0); + OUT_RINGf (512.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); +*/ + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x022c, 2); + OUT_RING (0x280); + OUT_RING (0x07d28000); + } + +/* * illegal method, protection fault + BEGIN_RING(NvSub3D, 0x1c2c, 1); + OUT_RING (0); */ + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1da4, 1); + OUT_RING (0); + } + +/* * crashes with illegal method, protection fault + BEGIN_RING(NvSub3D, 0x1c18, 1); + OUT_RING (0x200); */ + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING ((0 << 16) | 0); + OUT_RING ((0 << 16) | 0); + + /* *** Set state *** */ + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + + for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { + BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (0); + } + BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (0x30d410d0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (0x00011101); + BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (0x130e0300); + OUT_RING (0x0c091c80); + BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (0x20c400c0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (0x035125a0); + OUT_RING (0); + OUT_RING (0x40002000); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (0xffff0000); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (0xff); + OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (0); + OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(kelvin, 0x17cc, 1); + OUT_RING (0); + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d84, 1); + OUT_RING (1); + } + BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (0x00020000); + BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); + for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { + OUT_RING(0xffffffff); + } + + BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (1); + BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (0.0); + OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (1); + if (!is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d84, 1); + OUT_RING (3); + } + BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + if (!is_nv25tcl) { + OUT_RING (8); + } else { + OUT_RINGf (1.0); + } + if (!is_nv25tcl) { + BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + } else { + BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, 0x0a1c, 1); + OUT_RING (0x800); + } + BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (8); + BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (NV20TCL_CULL_FACE_BACK); + OUT_RING (NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { + OUT_RING(0); + } + BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (1.5); + OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (NV20TCL_FOG_MODE_EXP_2); + OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); + BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); + OUT_RING (NV20TCL_ENGINE_FIXED); + + for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { + BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (0); + } + + BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + for (i = 4; i < 16; ++i) { + OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + } + + BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (1); + BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (0x00010101); + BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (0); + + memset(projectionmatrix, 0, sizeof(projectionmatrix)); + projectionmatrix[0*4+0] = 1.0; + projectionmatrix[1*4+1] = 1.0; + projectionmatrix[2*4+2] = 1.0; + projectionmatrix[3*4+3] = 1.0; + BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + for (i = 0; i < 16; i++) { + OUT_RINGf (projectionmatrix[i]); + } + + BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (0.0); + OUT_RINGf (16777216.0); /* bpp dependant? */ + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4); + OUT_RINGf (-2048.0); + OUT_RINGf (-2048.0); + OUT_RINGf (16777215.0 * 0.5); + OUT_RING (0); + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4); + OUT_RINGf (-2048.0); + OUT_RINGf (-2048.0); + OUT_RINGf (16777215.0 * 0.5); + OUT_RING (0); + + FIRE_RING (NULL); +} + +static void +nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv20_screen *screen = nv20_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv20_context *nv20; + struct nouveau_winsys *nvws = screen->nvws; + + nv20 = CALLOC(1, sizeof(struct nv20_context)); + if (!nv20) + return NULL; + nv20->screen = screen; + nv20->pctx_id = pctx_id; + + nv20->nvws = nvws; + + nv20->pipe.winsys = ws; + nv20->pipe.screen = pscreen; + nv20->pipe.destroy = nv20_destroy; + nv20->pipe.set_edgeflags = nv20_set_edgeflags; + nv20->pipe.draw_arrays = nv20_draw_arrays; + nv20->pipe.draw_elements = nv20_draw_elements; + nv20->pipe.clear = nv20_clear; + nv20->pipe.flush = nv20_flush; + + nv20_init_surface_functions(nv20); + nv20_init_state_functions(nv20); + + nv20->draw = draw_create(); + assert(nv20->draw); + draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20)); + + nv20_init_hwctx(nv20); + + return &nv20->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h new file mode 100644 index 0000000000..8ad926db20 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -0,0 +1,153 @@ +#ifndef __NV20_CONTEXT_H__ +#define __NV20_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv20_screen *ctx = nv20->screen +#include "nouveau/nouveau_push.h" + +#include "nv20_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV20_NEW_VERTPROG (1 << 0) +#define NV20_NEW_FRAGPROG (1 << 1) +#define NV20_NEW_VTXARRAYS (1 << 2) +#define NV20_NEW_BLEND (1 << 3) +#define NV20_NEW_BLENDCOL (1 << 4) +#define NV20_NEW_RAST (1 << 5) +#define NV20_NEW_DSA (1 << 6) +#define NV20_NEW_VIEWPORT (1 << 7) +#define NV20_NEW_SCISSOR (1 << 8) +#define NV20_NEW_FRAMEBUFFER (1 << 9) + +#include "nv20_screen.h" + +struct nv20_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv20_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + uint32_t dirty; + + struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + uint32_t lma_offset; + + struct nv20_blend_state *blend; + struct pipe_blend_color *blend_color; + struct nv20_rasterizer_state *rast; + struct nv20_depth_stencil_alpha_state *dsa; + struct pipe_viewport_state *viewport; + struct pipe_scissor_state *scissor; + struct pipe_framebuffer_state *framebuffer; + + //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + + struct vertex_info vertex_info; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[2]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + +/* struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv20_vertex_program *active; + + struct nv20_vertex_program *current; + } vertprog; +*/ + struct { + struct nv20_fragment_program *active; + + struct nv20_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv20_context * +nv20_context(struct pipe_context *pipe) +{ + return (struct nv20_context *)pipe; +} + +extern void nv20_init_state_functions(struct nv20_context *nv20); +extern void nv20_init_surface_functions(struct nv20_context *nv20); + +extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv20_clear.c */ +extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv20_draw.c */ +extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20); + +/* nv20_fragprog.c */ +extern void nv20_fragprog_bind(struct nv20_context *, + struct nv20_fragment_program *); +extern void nv20_fragprog_destroy(struct nv20_context *, + struct nv20_fragment_program *); + +/* nv20_fragtex.c */ +extern void nv20_fragtex_bind(struct nv20_context *); + +/* nv20_prim_vbuf.c */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ); +extern void nv20_vtxbuf_bind(struct nv20_context* nv20); + +/* nv20_state.c and friends */ +extern void nv20_emit_hw_state(struct nv20_context *nv20); +extern void nv20_state_tex_update(struct nv20_context *nv20); + +/* nv20_vbo.c */ +extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv20_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c new file mode 100644 index 0000000000..4f496369dd --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv20_context.h" + +void +nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp) +{ +} + +void +nv20_fragprog_destroy(struct nv20_context *nv20, + struct nv20_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c new file mode 100644 index 0000000000..495a7be912 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -0,0 +1,124 @@ +#include "nv20_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV20TCL_TX_FORMAT_FORMAT_##tf, \ +} + +struct nv20_texture_format { + boolean defined; + uint pipe; + int format; +}; + +static struct nv20_texture_format +nv20_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM , L8 ), + _(A8_UNORM , A8 ), + _(A8L8_UNORM , A8L8 ), +/* _(RGB_DXT1 , DXT1, ), */ +/* _(RGBA_DXT1 , DXT1, ), */ +/* _(RGBA_DXT3 , DXT3, ), */ +/* _(RGBA_DXT5 , DXT5, ), */ + {}, +}; + +static struct nv20_texture_format * +nv20_fragtex_format(uint pipe_format) +{ + struct nv20_texture_format *tf = nv20_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv20_fragtex_build(struct nv20_context *nv20, int unit) +{ +#if 0 + struct nv20_sampler_state *ps = nv20->tex_sampler[unit]; + struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; + struct pipe_texture *pt = &nv20mt->base; + struct nv20_texture_format *tf; + uint32_t txf, txs, txp; + + tf = nv20_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV10TCL_TX_FORMAT_CUBE_MAP; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +#endif +} + +void +nv20_fragtex_bind(struct nv20_context *nv20) +{ +#if 0 + struct nv20_fragment_program *fp = nv20->fragprog.active; + unsigned samplers, unit; + + samplers = nv20->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv20->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv20_fragtex_build(nv20, unit); + } + + nv20->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c new file mode 100644 index 0000000000..c6106d58c4 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -0,0 +1,156 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_miptree_layout(struct nv20_miptree *nv20mt) +{ + struct pipe_texture *pt = &nv20mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (swizzled) + nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + else + nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63; + + nv20mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv20mt->level[l].image_offset[f] = offset; + offset += nv20mt->level[l].pitch * pt->height[l]; + } + } + + nv20mt->total_size = offset; +} + +static struct pipe_texture * +nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv20_miptree *mt; + + mt = MALLOC(sizeof(struct nv20_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv20_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt; + int l; + + pipe_buffer_reference(screen, &nv20mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv20mt->level[l].image_offset) + FREE(nv20mt->level[l].image_offset); + } + FREE(nv20mt); + } +} + +static struct pipe_surface * +nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(screen, &ps->buffer, nv20mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv20mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->winsys = screen->winsys; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv20mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv20mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv20mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv20_miptree_surface_release(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); + FREE(ps); +} + +void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv20_miptree_create; + pscreen->texture_release = nv20_miptree_release; + pscreen->get_tex_surface = nv20_miptree_surface_get; + pscreen->tex_surface_release = nv20_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c new file mode 100644 index 0000000000..74540845a8 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -0,0 +1,402 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv20. + */ +struct nv20_vbuf_render { + struct vbuf_render base; + + struct nv20_context *nv20; + + /** Vertex buffer in VRAM */ + struct pipe_buffer *pbuffer; + + /** Vertex buffer in normal memory */ + void *mbuffer; + + /** Vertex size in bytes */ + /*unsigned vertex_size;*/ + + /** Hardware primitive */ + unsigned hwprim; +}; + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv20_vbuf_render * +nv20_vbuf_render(struct vbuf_render *render) +{ + assert(render); + return (struct nv20_vbuf_render *)render; +} + +void nv20_vtxbuf_bind( struct nv20_context* nv20 ) +{ +#if 0 + int i; + for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { + BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(0/*nv20->vtxbuf*/); + BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(0/*XXX*/); + } +#endif +} + +static const struct vertex_info * +nv20_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct nv20_context *nv20 = nv20_render->nv20; + + nv20_emit_hw_state(nv20); + + return &nv20->vertex_info; +} + +static void * +nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ + nv20_render->mbuffer = MALLOC(size); + return nv20_render->mbuffer; +} + +static void * +nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ + struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; + nv20_render->pbuffer = winsys->buffer_create(winsys, 64, + PIPE_BUFFER_USAGE_VERTEX, size); + return winsys->buffer_map(winsys, + nv20_render->pbuffer, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + +static void * +nv20_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + void *buf; + + assert(!nv20_render->pbuffer); + assert(!nv20_render->mbuffer); + + /* + * For small amount of vertices, don't bother with pipe vertex + * buffer, the data will be passed directly via the fifo. + */ + /* XXX: Pipe vertex buffers don't work. */ + if (0 && size > 16 * 1024) + buf = nv20__allocate_pbuffer(nv20_render, size); + else + buf = nv20__allocate_mbuffer(nv20_render, size); + + if (buf) + nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS; + + return buf; +} + +static boolean +nv20_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + unsigned hwp = nvgl_primitive(prim); + if (hwp == 0) + return FALSE; + + nv20_render->hwprim = hwp; + return TRUE; +} + +static uint32_t +nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) +{ + return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) | + (fields << NV20TCL_VTXFMT_SIZE_SHIFT) | + (type << NV20TCL_VTXFMT_TYPE_SHIFT); +} + +static unsigned +nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) +{ + uint32_t hwfmt = 0; + unsigned fields; + + switch (type) { + case EMIT_OMIT: + hwfmt = nv20__vtxhwformat(0, 0, 2); + fields = 0; + break; + case EMIT_1F: + hwfmt = nv20__vtxhwformat(4, 1, 2); + fields = 1; + break; + case EMIT_2F: + hwfmt = nv20__vtxhwformat(8, 2, 2); + fields = 2; + break; + case EMIT_3F: + hwfmt = nv20__vtxhwformat(12, 3, 2); + fields = 3; + break; + case EMIT_4F: + hwfmt = nv20__vtxhwformat(16, 4, 2); + fields = 4; + break; + default: + NOUVEAU_ERR("unhandled attrib_emit %d\n", type); + return 0; + } + + BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(hwfmt); + return fields; +} + +static unsigned +nv20__emit_vertex_array_format(struct nv20_context *nv20) +{ + struct vertex_info *vinfo = &nv20->vertex_info; + int hwattr = NV20TCL_VTXFMT__SIZE; + int attr = 0; + unsigned nr_fields = 0; + + while (hwattr-- > 0) { + if (vinfo->hwfmt[0] & (1 << hwattr)) { + nr_fields += nv20__emit_format(nv20, + vinfo->attrib[attr].emit, hwattr); + attr++; + } else + nv20__emit_format(nv20, EMIT_OMIT, hwattr); + } + + return nr_fields; +} + +static void +nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_context *nv20 = nv20_render->nv20; + struct vertex_info *vinfo = &nv20->vertex_info; + unsigned nr_fields; + int max_push; + ubyte *data = nv20_render->mbuffer; + int vsz = 4 * vinfo->size; + + nr_fields = nv20__emit_vertex_array_format(nv20); + + BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(nv20_render->hwprim); + + max_push = 1200 / nr_fields; + while (nr_indices) { + int i; + int push = MIN2(nr_indices, max_push); + + BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + for (i = 0; i < push; i++) { + /* XXX: fixme to handle other than floats? */ + int f = nr_fields; + float *attrv = (float*)&data[indices[i] * vsz]; + while (f-- > 0) + OUT_RINGf(*attrv++); + } + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); +} + +static void +nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_context *nv20 = nv20_render->nv20; + int push, i; + + NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); + + BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(nv20_render->pbuffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + + BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(nv20_render->hwprim); + + if (nr_indices & 1) { + BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (indices[0]); + indices++; nr_indices--; + } + + while (nr_indices) { + // XXX too big/small ? check the size + push = MIN2(nr_indices, 1200 * 2); + + BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((indices[i+1] << 16) | indices[i]); + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (0); +} + +static void +nv20_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + + nv20_emit_hw_state(nv20_render->nv20); + + if (nv20_render->pbuffer) + nv20__draw_pbuffer(nv20_render, indices, nr_indices); + else if (nv20_render->mbuffer) + nv20__draw_mbuffer(nv20_render, indices, nr_indices); + else + assert(0); +} + + +static void +nv20_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct nv20_context *nv20 = nv20_render->nv20; + struct pipe_winsys *winsys = nv20->pipe.winsys; + struct pipe_screen *pscreen = &nv20->screen->pipe; + + if (nv20_render->pbuffer) { + winsys->buffer_unmap(winsys, nv20_render->pbuffer); + pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL); + } else if (nv20_render->mbuffer) { + FREE(nv20_render->mbuffer); + nv20_render->mbuffer = NULL; + } else + assert(0); +} + + +static void +nv20_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + + assert(!nv20_render->pbuffer); + assert(!nv20_render->mbuffer); + + FREE(nv20_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv20_vbuf_render_create( struct nv20_context *nv20 ) +{ + struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render); + + nv20_render->nv20 = nv20; + + nv20_render->base.max_vertex_buffer_bytes = 16*1024; + nv20_render->base.max_indices = 1024; + nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info; + nv20_render->base.allocate_vertices = + nv20_vbuf_render_allocate_vertices; + nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive; + nv20_render->base.draw = nv20_vbuf_render_draw; + nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices; + nv20_render->base.destroy = nv20_vbuf_render_destroy; + + return &nv20_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv20_vbuf_render_create(nv20); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv20->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c new file mode 100644 index 0000000000..2ca6e6b149 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -0,0 +1,206 @@ +#include "pipe/p_screen.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static const char * +nv20_screen_get_name(struct pipe_screen *screen) +{ + struct nv20_screen *nv20screen = nv20_screen(screen); + struct nouveau_device *dev = nv20screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv20_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv20_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 2; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv20_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 2.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv20_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + + map = ws->buffer_map(ws, surface->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + + ws->buffer_unmap(ws, surface->buffer); +} + +static void +nv20_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv20_screen *screen = nv20_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->kelvin); + + FREE(pscreen); +} + +struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen); + unsigned kelvin_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 3D object */ + if (chipset >= 0x25) + kelvin_class = NV25TCL; + else if (chipset >= 0x20) + kelvin_class = NV20TCL; + + if (!kelvin_class || chipset >= 0x30) { + NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv20_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv20_screen_destroy; + + screen->pipe.get_name = nv20_screen_get_name; + screen->pipe.get_vendor = nv20_screen_get_vendor; + screen->pipe.get_param = nv20_screen_get_param; + screen->pipe.get_paramf = nv20_screen_get_paramf; + + screen->pipe.is_format_supported = nv20_screen_is_format_supported; + + screen->pipe.surface_map = nv20_surface_map; + screen->pipe.surface_unmap = nv20_surface_unmap; + + nv20_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h new file mode 100644 index 0000000000..8f2f2e341d --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.h @@ -0,0 +1,22 @@ +#ifndef __NV20_SCREEN_H__ +#define __NV20_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv20_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + /* HW graphics objects */ + struct nouveau_grobj *kelvin; + struct nouveau_notifier *sync; +}; + +static INLINE struct nv20_screen * +nv20_screen(struct pipe_screen *screen) +{ + return (struct nv20_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c new file mode 100644 index 0000000000..21bde5b81f --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -0,0 +1,581 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +static void * +nv20_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv20_blend_state *cb; + + cb = MALLOC(sizeof(struct nv20_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv20_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->blend = (struct nv20_blend_state*)blend; + + nv20->dirty |= NV20_NEW_BLEND; +} + +static void +nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV20TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV20TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV20TCL_TX_WRAP_S_REPEAT; + break; + } + + return (ret >> NV20TCL_TX_WRAP_S_SHIFT); +} + +static void * +nv20_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv20_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv20_sampler_state)); + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy > 1.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 8.0) { + ps->en |= NV20TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV20TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV20TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= + NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= + NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= + NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv20_context *nv20 = nv20_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv20->tex_sampler[unit] = sampler[unit]; + nv20->dirty_samplers |= (1 << unit); + } +} + +static void +nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv20_context *nv20 = nv20_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit]; + nv20->dirty_samplers |= (1 << unit); + } +} + +static void * +nv20_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv20_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = MALLOC(sizeof(struct nv20_rasterizer_state)); + + rs->templ = cso; + + rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT : + NV20TCL_SHADE_MODEL_SMOOTH; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + + /* XXX: nv20 and nv25 different! */ + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV20TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV20TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV20TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV20TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV20TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV20TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV20TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->rast = (struct nv20_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL)); + + nv20->dirty |= NV20_NEW_RAST; +} + +static void +nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv20_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; + hw->stencil.wmask = cso->stencil[0].write_mask; + hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); + hw->stencil.ref = cso->stencil[0].ref_value; + hw->stencil.vmask = cso->stencil[0].value_mask; + hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); + hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); + hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref); + + return (void *)hw; +} + +static void +nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa; + + nv20->dirty |= NV20_NEW_DSA; +} + +static void +nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv20_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + return draw_create_vertex_shader(nv20->draw, templ); +} + +static void +nv20_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); + + nv20->dirty |= NV20_NEW_VERTPROG; +} + +static void +nv20_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv20_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv20_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv20_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(cso->tokens, &fp->info); + + return (void *)fp; +} + +static void +nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_fragment_program *fp = hwcso; + + nv20->fragprog.current = fp; + nv20->dirty |= NV20_NEW_FRAGPROG; +} + +static void +nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_fragment_program *fp = hwcso; + + nv20_fragprog_destroy(nv20, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv20_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->blend_color = (struct pipe_blend_color*)bcol; + + nv20->dirty |= NV20_NEW_BLENDCOL; +} + +static void +nv20_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_set_clip_state(nv20->draw, clip); +} + +static void +nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->size && (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv20->constbuf[shader], mapped, buf->size); + nv20->constbuf_nr[shader] = + buf->size / (4 * sizeof(float)); + ws->buffer_unmap(ws, buf->buffer); + } + } +} + +static void +nv20_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->framebuffer = (struct pipe_framebuffer_state*)fb; + + nv20->dirty |= NV20_NEW_FRAMEBUFFER; +} + +static void +nv20_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv20_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->scissor = (struct pipe_scissor_state*)s; + + nv20->dirty |= NV20_NEW_SCISSOR; +} + +static void +nv20_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->viewport = (struct pipe_viewport_state*)vpt; + + draw_set_viewport_state(nv20->draw, nv20->viewport); + + nv20->dirty |= NV20_NEW_VIEWPORT; +} + +static void +nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count); + nv20->dirty |= NV20_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv20->draw, count, vb); +} + +static void +nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + memcpy(nv20->vtxelt, ve, sizeof(*ve) * count); + nv20->dirty |= NV20_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv20->draw, count, ve); +} + +void +nv20_init_state_functions(struct nv20_context *nv20) +{ + nv20->pipe.create_blend_state = nv20_blend_state_create; + nv20->pipe.bind_blend_state = nv20_blend_state_bind; + nv20->pipe.delete_blend_state = nv20_blend_state_delete; + + nv20->pipe.create_sampler_state = nv20_sampler_state_create; + nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; + nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; + nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + + nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; + nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; + nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete; + + nv20->pipe.create_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_create; + nv20->pipe.bind_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_bind; + nv20->pipe.delete_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_delete; + + nv20->pipe.create_vs_state = nv20_vp_state_create; + nv20->pipe.bind_vs_state = nv20_vp_state_bind; + nv20->pipe.delete_vs_state = nv20_vp_state_delete; + + nv20->pipe.create_fs_state = nv20_fp_state_create; + nv20->pipe.bind_fs_state = nv20_fp_state_bind; + nv20->pipe.delete_fs_state = nv20_fp_state_delete; + + nv20->pipe.set_blend_color = nv20_set_blend_color; + nv20->pipe.set_clip_state = nv20_set_clip_state; + nv20->pipe.set_constant_buffer = nv20_set_constant_buffer; + nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state; + nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple; + nv20->pipe.set_scissor_state = nv20_set_scissor_state; + nv20->pipe.set_viewport_state = nv20_set_viewport_state; + + nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers; + nv20->pipe.set_vertex_elements = nv20_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h new file mode 100644 index 0000000000..34f402fdcb --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.h @@ -0,0 +1,139 @@ +#ifndef __NV20_STATE_H__ +#define __NV20_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv20_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv20_sampler_state { + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv20_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + + uint32_t point_size; + + uint32_t poly_smooth_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv20_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv20_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv20_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv20_vertex_program_exec *insns; + unsigned nr_insns; + struct nv20_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv20_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv20_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv20_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + +struct nv20_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv20_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c new file mode 100644 index 0000000000..5265bf3a31 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -0,0 +1,359 @@ +#include "nv20_context.h" +#include "nv20_state.h" +#include "draw/draw_context.h" + +static void nv20_state_emit_blend(struct nv20_context* nv20) +{ + struct nv20_blend_state *b = nv20->blend; + + BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (b->d_enable); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (b->b_enable); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (b->b_srcfunc); + OUT_RING (b->b_dstfunc); + + BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (b->c_mask); +} + +static void nv20_state_emit_blend_color(struct nv20_context* nv20) +{ + struct pipe_blend_color *c = nv20->blend_color; + + BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + (float_to_ubyte(c->color[0]) << 16)| + (float_to_ubyte(c->color[1]) << 8) | + (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv20_state_emit_rast(struct nv20_context* nv20) +{ + struct nv20_rasterizer_state *r = nv20->rast; + + BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (r->shade_model); + OUT_RING (r->line_width); + + + BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (r->point_size); + + BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (r->poly_mode_front); + OUT_RING (r->poly_mode_back); + + + BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (r->cull_face); + OUT_RING (r->front_face); + + BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (r->line_smooth_en); + OUT_RING (r->poly_smooth_en); + + BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (r->cull_face_en); +} + +static void nv20_state_emit_dsa(struct nv20_context* nv20) +{ + struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + + BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (d->depth.func); + + BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (d->depth.write_enable); + + BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (d->depth.test_enable); + +#if 0 + BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (d->stencil.enable); + BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (d->alpha.enabled); + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (d->alpha.func); + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (d->alpha.ref); +} + +static void nv20_state_emit_viewport(struct nv20_context* nv20) +{ +} + +static void nv20_state_emit_scissor(struct nv20_context* nv20) +{ + /* NV20TCL_SCISSOR_* is probably a software method */ +/* struct pipe_scissor_state *s = nv20->scissor; + BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv20_state_emit_framebuffer(struct nv20_context* nv20) +{ + struct pipe_framebuffer_state* fb = nv20->framebuffer; + struct pipe_surface *rt, *zeta = NULL; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + if (zeta) { + BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (zeta->stride << 16)); + } else { + BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (rt->stride << 16)); + } + + nv20->rt[0] = rt->buffer; + + if (zeta_format) + { + nv20->zeta = zeta->buffer; + } + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0); + OUT_RING (((h - 1) << 16) | 0); +} + +static void nv20_vertex_layout(struct nv20_context *nv20) +{ + struct nv20_fragment_program *fp = nv20->fragprog.current; + struct draw_context *dc = nv20->draw; + uint32_t src; + int i; + struct vertex_info *vinfo = &nv20->vertex_info; + const enum interp_mode colorInterp = INTERP_LINEAR; + boolean colors[2] = { FALSE }; + boolean generics[4] = { FALSE }; + boolean fog = FALSE; + + memset(vinfo, 0, sizeof(*vinfo)); + + /* + * NV10 hardware vertex attribute order: + * fog, weight, normal, tex1, tex0, 2nd color, color, position + * vinfo->hwfmt[0] has a used-bit corresponding to each of these. + * relation to TGSI_SEMANTIC_*: + * - POSITION: position (always used) + * - COLOR: 2nd color, color + * - GENERIC: weight, normal, tex1, tex0 + * - FOG: fog + */ + + for (i = 0; i < fp->info.num_inputs; i++) { + int isn = fp->info.input_semantic_name[i]; + int isi = fp->info.input_semantic_index[i]; + switch (isn) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + assert(isi < 2); + colors[isi] = TRUE; + break; + case TGSI_SEMANTIC_GENERIC: + assert(isi < 4); + generics[isi] = TRUE; + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + break; + default: + assert(0 && "unknown input_semantic_name"); + } + } + + if (fog) { + int src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << 7); + } + + for (i = 3; i >= 0; i--) { + int src; + if (!generics[i]) + continue; + src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << (i + 3)); + } + + if (colors[1]) { + int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vinfo->hwfmt[0] |= (1 << 2); + } + + if (colors[0]) { + int src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vinfo->hwfmt[0] |= (1 << 1); + } + + /* always do position */ + src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo->hwfmt[0] |= (1 << 0); + + draw_compute_vertex_size(vinfo); +} + +void +nv20_emit_hw_state(struct nv20_context *nv20) +{ + int i; + + if (nv20->dirty & NV20_NEW_VERTPROG) { + //nv20_vertprog_bind(nv20, nv20->vertprog.current); + nv20->dirty &= ~NV20_NEW_VERTPROG; + } + + if (nv20->dirty & NV20_NEW_FRAGPROG) { + nv20_fragprog_bind(nv20, nv20->fragprog.current); + /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */ + nv20->dirty_samplers |= (1<<10); + nv20->dirty_samplers = 0; + } + + if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) { + nv20_fragtex_bind(nv20); + nv20->dirty &= ~NV20_NEW_FRAGPROG; + } + + if (nv20->dirty & NV20_NEW_VTXARRAYS) { + nv20->dirty &= ~NV20_NEW_VTXARRAYS; + nv20_vertex_layout(nv20); + nv20_vtxbuf_bind(nv20); + } + + if (nv20->dirty & NV20_NEW_BLEND) { + nv20->dirty &= ~NV20_NEW_BLEND; + nv20_state_emit_blend(nv20); + } + + if (nv20->dirty & NV20_NEW_BLENDCOL) { + nv20->dirty &= ~NV20_NEW_BLENDCOL; + nv20_state_emit_blend_color(nv20); + } + + if (nv20->dirty & NV20_NEW_RAST) { + nv20->dirty &= ~NV20_NEW_RAST; + nv20_state_emit_rast(nv20); + } + + if (nv20->dirty & NV20_NEW_DSA) { + nv20->dirty &= ~NV20_NEW_DSA; + nv20_state_emit_dsa(nv20); + } + + if (nv20->dirty & NV20_NEW_VIEWPORT) { + nv20->dirty &= ~NV20_NEW_VIEWPORT; + nv20_state_emit_viewport(nv20); + } + + if (nv20->dirty & NV20_NEW_SCISSOR) { + nv20->dirty &= ~NV20_NEW_SCISSOR; + nv20_state_emit_scissor(nv20); + } + + if (nv20->dirty & NV20_NEW_FRAMEBUFFER) { + nv20->dirty &= ~NV20_NEW_FRAMEBUFFER; + nv20_state_emit_framebuffer(nv20); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv20_vbo.c + */ + + /* Render target */ +/* XXX figre out who's who for NV10TCL_DMA_* and fill accordingly + * BEGIN_RING(kelvin, NV20TCL_DMA_COLOR0, 1); + * OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */ + BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + if (nv20->zeta) { +/* XXX + * BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); + * OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); */ + BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX for when we allocate LMA on nv17 */ +/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(nv20->zeta + lma_offset);*/ + } + + /* Vertex buffer */ + BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + /* Texture images */ + for (i = 0; i < 2; i++) { + if (!(nv20->fp_samplers & (1 << i))) + continue; + BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, + NV20TCL_TX_FORMAT_DMA1); + } +} + diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c new file mode 100644 index 0000000000..7bc68d0ca2 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_surface.c @@ -0,0 +1,64 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv20_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv20_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nouveau_winsys *nvws = nv20->nvws; + + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); +} + +static void +nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nouveau_winsys *nvws = nv20->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv20_init_surface_functions(struct nv20_context *nv20) +{ + nv20->pipe.surface_copy = nv20_surface_copy; + nv20->pipe.surface_fill = nv20_surface_fill; +} diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c new file mode 100644 index 0000000000..4edc4efebd --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -0,0 +1,77 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv20_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv20_context *nv20 = nv20_context( pipe ); + struct draw_context *draw = nv20->draw; + unsigned i; + + nv20_emit_hw_state(nv20); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv20->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv20->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(draw, + nv20->constbuf[PIPE_SHADER_VERTEX], + nv20->constbuf_nr[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(nv20->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv20->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv20_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv20_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c new file mode 100644 index 0000000000..a885fcd7a5 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv20_shader.h" + +#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv20_sr_neg((s)) +#define abs(s) nv20_sr_abs((s)) + +struct nv20_vpc { + struct nv20_vertex_program *vp; + + struct nv20_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv20_sreg *imm; + unsigned nr_imm; +}; + +static struct nv20_sreg +temp(struct nv20_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv20_sr(NV30SR_TEMP, idx); +} + +static struct nv20_sreg +constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv20_vertex_program *vp = vpc->vp; + struct nv20_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv20_sr(NV30SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv20_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src) +{ + struct nv20_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_TEMP: + sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV30SR_INPUT: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); + break; + case NV30SR_CONST: + sr |= (NV30_VP_SRC_REG_TYPE_CONST << + NV30_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV30SR_NONE: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + * \u/ + * + */ + + switch (pos) { + case 0: + hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> + NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << + NV30_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> + NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << + NV30_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst) +{ + struct nv20_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV30SR_TEMP: + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + break; + case NV30SR_OUTPUT: + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + break; + default: + assert(0); + } +} + +static void +nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op, + struct nv20_sreg dst, int mask, + struct nv20_sreg s0, struct nv20_sreg s1, + struct nv20_sreg s2) +{ + struct nv20_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + + if (dst.type == NV30SR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv20_sreg +tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv20_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv20_sreg +tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv20_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv20_sr(NV30SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv20_sreg src[3], dst, tmp; + struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV30_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv20_vertprog_prepare(struct nv20_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +static void +nv20_vertprog_translate(struct nv20_context *nv20, + struct nv20_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv20_vpc *vpc = NULL; + + tgsi_dump(vp->pipe.tokens,0); + + vpc = CALLOC(1, sizeof(struct nv20_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv20_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv20_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv20_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + FREE(vpc); +} + +static boolean +nv20_vertprog_validate(struct nv20_context *nv20) +{ + struct nouveau_winsys *nvws = nv20->nvws; + struct pipe_winsys *ws = nv20->pipe.winsys; + struct nouveau_grobj *rankine = nv20->screen->rankine; + struct nv20_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + vp = nv20->vertprog; + constbuf = nv20->constbuf[PIPE_SHADER_VERTEX]; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv20_vertprog_translate(nv20, vp); + if (!vp->translated) + return FALSE; + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv20->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv20_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv20->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv20_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV30_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv20_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) { + ws->buffer_unmap(ws, constbuf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, vp->insns[i].data[0], vp->insns[i].data[1], + vp->insns[i].data[2], vp->insns[i].data[3]); + } +#endif + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) { + so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv20->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = 0; + so_ref(NULL, &vp->so); +} + +struct nv20_state_entry nv20_state_vertprog = { + .validate = nv20_vertprog_validate, + .dirty = { + .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, + .hw = NV30_STATE_VERTPROG, + } +}; diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile new file mode 100644 index 0000000000..69f2790dfe --- /dev/null +++ b/src/gallium/drivers/nv30/Makefile @@ -0,0 +1,37 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv30 + +DRIVER_SOURCES = \ + nv30_clear.c \ + nv30_context.c \ + nv30_draw.c \ + nv30_fragprog.c \ + nv30_fragtex.c \ + nv30_miptree.c \ + nv30_query.c \ + nv30_screen.c \ + nv30_state.c \ + nv30_state_blend.c \ + nv30_state_emit.c \ + nv30_state_fb.c \ + nv30_state_rasterizer.c \ + nv30_state_scissor.c \ + nv30_state_stipple.c \ + nv30_state_viewport.c \ + nv30_state_zsa.c \ + nv30_surface.c \ + nv30_vbo.c \ + nv30_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c new file mode 100644 index 0000000000..8c3ca204d5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" + +void +nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c new file mode 100644 index 0000000000..2bff28aca9 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +static void +nv30_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (1); + } + + FIRE_RING(fence); +} + +static void +nv30_destroy(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (nv30->draw) + draw_destroy(nv30->draw); + FREE(nv30); +} + +struct pipe_context * +nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv30_context *nv30; + struct nouveau_winsys *nvws = screen->nvws; + + nv30 = CALLOC(1, sizeof(struct nv30_context)); + if (!nv30) + return NULL; + nv30->screen = screen; + nv30->pctx_id = pctx_id; + + nv30->nvws = nvws; + + nv30->pipe.winsys = ws; + nv30->pipe.screen = pscreen; + nv30->pipe.destroy = nv30_destroy; + nv30->pipe.draw_arrays = nv30_draw_arrays; + nv30->pipe.draw_elements = nv30_draw_elements; + nv30->pipe.clear = nv30_clear; + nv30->pipe.flush = nv30_flush; + + nv30_init_query_functions(nv30); + nv30_init_surface_functions(nv30); + nv30_init_state_functions(nv30); + + /* Create, configure, and install fallback swtnl path */ + nv30->draw = draw_create(); + draw_wide_point_threshold(nv30->draw, 9999999.0); + draw_wide_line_threshold(nv30->draw, 9999999.0); + draw_enable_line_stipple(nv30->draw, FALSE); + draw_enable_point_sprites(nv30->draw, FALSE); + draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); + + return &nv30->pipe; +} + diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h new file mode 100644 index 0000000000..b933769700 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -0,0 +1,212 @@ +#ifndef __NV30_CONTEXT_H__ +#define __NV30_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv30_screen *ctx = nv30->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv30_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv30_state_index { + NV30_STATE_FB = 0, + NV30_STATE_VIEWPORT = 1, + NV30_STATE_BLEND = 2, + NV30_STATE_RAST = 3, + NV30_STATE_ZSA = 4, + NV30_STATE_BCOL = 5, + NV30_STATE_CLIP = 6, + NV30_STATE_SCISSOR = 7, + NV30_STATE_STIPPLE = 8, + NV30_STATE_FRAGPROG = 9, + NV30_STATE_VERTPROG = 10, + NV30_STATE_FRAGTEX0 = 11, + NV30_STATE_FRAGTEX1 = 12, + NV30_STATE_FRAGTEX2 = 13, + NV30_STATE_FRAGTEX3 = 14, + NV30_STATE_FRAGTEX4 = 15, + NV30_STATE_FRAGTEX5 = 16, + NV30_STATE_FRAGTEX6 = 17, + NV30_STATE_FRAGTEX7 = 18, + NV30_STATE_FRAGTEX8 = 19, + NV30_STATE_FRAGTEX9 = 20, + NV30_STATE_FRAGTEX10 = 21, + NV30_STATE_FRAGTEX11 = 22, + NV30_STATE_FRAGTEX12 = 23, + NV30_STATE_FRAGTEX13 = 24, + NV30_STATE_FRAGTEX14 = 25, + NV30_STATE_FRAGTEX15 = 26, + NV30_STATE_VERTTEX0 = 27, + NV30_STATE_VERTTEX1 = 28, + NV30_STATE_VERTTEX2 = 29, + NV30_STATE_VERTTEX3 = 30, + NV30_STATE_VTXBUF = 31, + NV30_STATE_VTXFMT = 32, + NV30_STATE_VTXATTR = 33, + NV30_STATE_MAX = 34 +}; + +#include "nv30_screen.h" + +#define NV30_NEW_BLEND (1 << 0) +#define NV30_NEW_RAST (1 << 1) +#define NV30_NEW_ZSA (1 << 2) +#define NV30_NEW_SAMPLER (1 << 3) +#define NV30_NEW_FB (1 << 4) +#define NV30_NEW_STIPPLE (1 << 5) +#define NV30_NEW_SCISSOR (1 << 6) +#define NV30_NEW_VIEWPORT (1 << 7) +#define NV30_NEW_BCOL (1 << 8) +#define NV30_NEW_VERTPROG (1 << 9) +#define NV30_NEW_FRAGPROG (1 << 10) +#define NV30_NEW_ARRAYS (1 << 11) +#define NV30_NEW_UCP (1 << 12) + +struct nv30_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv30_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv30_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nv30_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned viewport_bypass; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NV30_STATE_MAX]; +}; + +struct nv30_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv30_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nv30_state state; + + /* Context state */ + unsigned dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct nv30_vertex_program *vertprog; + struct nv30_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nv30_rasterizer_state *rasterizer; + struct nv30_zsa_state *zsa; + struct nv30_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + const unsigned *edgeflags; +}; + +static INLINE struct nv30_context * +nv30_context(struct pipe_context *pipe) +{ + return (struct nv30_context *)pipe; +} + +struct nv30_state_entry { + boolean (*validate)(struct nv30_context *nv30); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern void nv30_init_state_functions(struct nv30_context *nv30); +extern void nv30_init_surface_functions(struct nv30_context *nv30); +extern void nv30_init_query_functions(struct nv30_context *nv30); + +extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv30_draw.c */ +extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); + +/* nv30_vertprog.c */ +extern void nv30_vertprog_destroy(struct nv30_context *, + struct nv30_vertex_program *); + +/* nv30_fragprog.c */ +extern void nv30_fragprog_destroy(struct nv30_context *, + struct nv30_fragment_program *); + +/* nv30_fragtex.c */ +extern void nv30_fragtex_bind(struct nv30_context *); + +/* nv30_state.c and friends */ +extern boolean nv30_state_validate(struct nv30_context *nv30); +extern void nv30_state_emit(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_rasterizer; +extern struct nv30_state_entry nv30_state_scissor; +extern struct nv30_state_entry nv30_state_stipple; +extern struct nv30_state_entry nv30_state_fragprog; +extern struct nv30_state_entry nv30_state_vertprog; +extern struct nv30_state_entry nv30_state_blend; +extern struct nv30_state_entry nv30_state_blend_colour; +extern struct nv30_state_entry nv30_state_zsa; +extern struct nv30_state_entry nv30_state_viewport; +extern struct nv30_state_entry nv30_state_framebuffer; +extern struct nv30_state_entry nv30_state_fragtex; +extern struct nv30_state_entry nv30_state_vbo; + +/* nv30_vbo.c */ +extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv30_clear.c */ +extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c new file mode 100644 index 0000000000..74fc138c05 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -0,0 +1,61 @@ +#include "draw/draw_pipe.h" + +#include "nv30_context.h" + +struct nv30_draw_stage { + struct draw_stage draw; + struct nv30_context *nv30; +}; + +static void +nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv30_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +struct draw_stage * +nv30_draw_render_stage(struct nv30_context *nv30) +{ + struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); + + nv30draw->nv30 = nv30; + nv30draw->draw.draw = nv30->draw; + nv30draw->draw.point = nv30_draw_point; + nv30draw->draw.line = nv30_draw_line; + nv30draw->draw.tri = nv30_draw_tri; + nv30draw->draw.flush = nv30_draw_flush; + nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; + nv30draw->draw.destroy = nv30_draw_destroy; + + return &nv30draw->draw; +} + diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c new file mode 100644 index 0000000000..320ba3f4bf --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -0,0 +1,911 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv30_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV30_FP_OP_COND_TR +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) +#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv30_fpc { + struct nv30_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + int high_temp; + int temp_temp_count; + int num_regs; + + uint depth_id; + uint colour_id; + + unsigned inst_offset; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv30_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv30_sreg +temp(struct nv30_fpc *fpc) +{ + int idx; + + idx = fpc->temp_temp_count++; + idx += fpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static INLINE struct nv30_sreg +constant(struct nv30_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv30_fpc *fpc, int size) +{ + struct nv30_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_INPUT: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); + break; + case NV30SR_OUTPUT: + sr |= NV30_FP_REG_SRC_HALF; + /* fall-through */ + case NV30SR_TEMP: + sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV30_FP_REG_SRC_SHIFT); + break; + case NV30SR_CONST: + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv30_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + break; + case NV30SR_NONE: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV30SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV30SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV30_FP_OP_OUT_REG_HALF; + } + break; + case NV30SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); +} + +static void +nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV30_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV30_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + + nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + if (fpc->high_temp < src.index) + fpc->high_temp = src.index; + break; + /* This is clearly insane, but gallium hands us shaders like this. + * Luckily fragprog results are just temp regs.. + */ + case TGSI_FILE_OUTPUT: + if (fsrc->SrcRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + int idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + if (fdst->DstRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + case TGSI_FILE_TEMPORARY: + idx = fdst->DstRegister.Index + 1; + if (fpc->high_temp < idx) + fpc->high_temp = idx; + return nv30_sr(NV30SR_TEMP, idx); + case TGSI_FILE_NULL: + return nv30_sr(NV30SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv30_sr(NV30SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv30_sreg *src) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv30_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg src[3], dst, tmp; + int mask, sat, unit = 0; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + fpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nv30_sr(NV30SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; + case TGSI_OPCODE_POW: + arith(fpc, sat, POW, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_RSQ: + arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fpc->depth_id = fdec->DeclarationRange.First; + break; + case TGSI_SEMANTIC_COLOR: + fpc->colour_id = fdec->DeclarationRange.First; + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_fragprog_prepare(struct nv30_fpc *fpc) +{ + struct tgsi_parse_context p; + /*int high_temp = -1, i;*/ + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv30_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + /*case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break;*/ + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + /*if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + }*/ + + return TRUE; + +out_err: + /*if (fpc->r_temp) + FREE(fpc->r_temp);*/ + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv30_fragprog_translate(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv30_fpc *fpc = NULL; + + tgsi_dump(fp->pipe.tokens,0); + + fpc = CALLOC(1, sizeof(struct nv30_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->high_temp = -1; + fpc->num_regs = 2; + + if (!nv30_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv30_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= (fpc->num_regs-1)/2; + fp->fp_reg_control = (1<<16)|0x4; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; + fp->on_hw = FALSE; +out_err: + tgsi_parse_free(&parse); + FREE(fpc); +} + +static void +nv30_fragprog_upload(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv30_fragprog_validate(struct nv30_context *nv30) +{ + struct nv30_fragment_program *fp = nv30->fragprog; + struct pipe_buffer *constbuf = + nv30->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ + nv30_fragprog_translate(nv30, fp); + if (!fp->translated) { + /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ + return FALSE; + } + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv30_fragprog_upload(nv30, fp); + + so = so_new(8, 1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, fp->fp_reg_control); + so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); + so_ref(so, &fp->so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv30_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv30_fragprog_upload(nv30, fp); + } + + if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { + so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv30_fragprog_destroy(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + if (fp->insn_len) + FREE(fp->insn); +} + +struct nv30_state_entry nv30_state_fragprog = { + .validate = nv30_fragprog_validate, + .dirty = { + .pipe = NV30_NEW_FRAGPROG, + .hw = NV30_STATE_FRAGPROG + } +}; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c new file mode 100644 index 0000000000..b1d2663af3 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -0,0 +1,163 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV34TCL_TX_FORMAT_FORMAT_##tf, \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \ +} + +struct nv30_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv30_texture_format +nv30_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), +// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), +// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), + {}, +}; + +static struct nv30_texture_format * +nv30_fragtex_format(uint pipe_format) +{ + struct nv30_texture_format *tf = nv30_texture_formats; + char fs[128]; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return NULL; +} + + +static struct nouveau_stateobj * +nv30_fragtex_build(struct nv30_context *nv30, int unit) +{ + struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; + struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; + struct pipe_texture *pt = &nv30mt->base; + struct nv30_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs , txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv30_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = tf->format; + txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV34TCL_TX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV34TCL_TX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV34TCL_TX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv30mt->level[0].pitch; + txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); + so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + + return so; +} + +static boolean +nv30_fragtex_validate(struct nv30_context *nv30) +{ + struct nv30_fragment_program *fp = nv30->fragprog; + struct nv30_state *state = &nv30->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(2, 0); + so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); + } + + samplers = nv30->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = nv30_fragtex_build(nv30, unit); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); + } + + nv30->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nv30_state_entry nv30_state_fragtex = { + .validate = nv30_fragtex_validate, + .dirty = { + .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c new file mode 100644 index 0000000000..37d297cc0f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -0,0 +1,196 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv30_context.h" + +static void +nv30_miptree_layout(struct nv30_miptree *nv30mt) +{ + struct pipe_texture *pt = &nv30mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f, pitch; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + pitch = pt->width[0]; + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (swizzled) + pitch = pt->nblocksx[l]; + pitch = align(pitch, 64); + + nv30mt->level[l].pitch = pitch * pt->block.size; + nv30mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv30mt->level[l].image_offset[f] = offset; + offset += nv30mt->level[l].pitch * pt->height[l]; + } + } + + nv30mt->total_size = offset; +} + +static struct pipe_texture * +nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv30_miptree *mt; + + mt = MALLOC(sizeof(struct nv30_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + break; + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + nv30_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, + PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv30_miptree *mt = (struct nv30_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + assert(mt->shadow_surface); + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv30_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv30mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->winsys = pscreen->winsys; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv30mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv30mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv30mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv30_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); + FREE(ps); +} + +void +nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv30_miptree_create; + pscreen->texture_release = nv30_miptree_release; + pscreen->get_tex_surface = nv30_miptree_surface_new; + pscreen->tex_surface_release = nv30_miptree_surface_del; +} diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c new file mode 100644 index 0000000000..2f974cf5c4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv30_context.h" + +struct nv30_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv30_query * +nv30_query(struct pipe_query *pipe) +{ + return (struct nv30_query *)pipe; +} + +static struct pipe_query * +nv30_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv30_query *q; + + q = CALLOC(1, sizeof(struct nv30_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + if (q->object) + nv30->nvws->res_free(&q->object); + FREE(q); +} + +static void +nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nv30->nvws->notifier_reset(nv30->screen->query, q->object->start); + + BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(NULL); +} + +static boolean +nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + struct nouveau_winsys *nvws = nv30->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv30->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv30->screen->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv30->screen->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv30_init_query_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_query = nv30_query_create; + nv30->pipe.destroy_query = nv30_query_destroy; + nv30->pipe.begin_query = nv30_query_begin; + nv30->pipe.end_query = nv30_query_end; + nv30->pipe.get_query_result = nv30_query_result; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c new file mode 100644 index 0000000000..29356e8c1e --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -0,0 +1,385 @@ +#include "pipe/p_screen.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +static const char * +nv30_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv30_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv30_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 1; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 8.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv30_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_map; + void *map; + + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + + if (!mt->shadow_tex) { + unsigned old_tex_usage = surface->texture->tex_usage; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; + mt->shadow_tex = screen->texture_create(screen, surface->texture); + surface->texture->tex_usage = old_tex_usage; + + assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); + mt->shadow_surface = screen->get_tex_surface + ( + screen, mt->shadow_tex, + surface->face, surface->level, surface->zslice, + surface->usage + ); + } + + surface_to_map = mt->shadow_surface; + } + else + surface_to_map = surface; + + assert(surface_to_map); + + map = ws->buffer_map(ws, surface_to_map->buffer, flags); + if (!map) + return NULL; + + return map + surface_to_map->offset; +} + +static void +nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_unmap; + + /* TODO: Copy from shadow just before push buffer is flushed instead. + There are probably some programs that map/unmap excessively + before rendering. */ + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + + assert(mt->shadow_tex); + + surface_to_unmap = mt->shadow_surface; + } + else + surface_to_unmap = surface; + + assert(surface_to_unmap); + + ws->buffer_unmap(ws, surface_to_unmap->buffer); + + if (surface_to_unmap != surface) { + struct nv30_screen *nvscreen = nv30_screen(screen); + + nvscreen->nvws->surface_copy(nvscreen->nvws, + surface, 0, 0, + surface_to_unmap, 0, 0, + surface->width, surface->height); + } +} + +static void +nv30_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->rankine); + + FREE(pscreen); +} + +struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct nouveau_stateobj *so; + unsigned rankine_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret, i; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x30: + if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0397; + else + if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0697; + else + if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0497; + break; + default: + break; + } + + if (!rankine_class) { + NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 256) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static rankine initialisation */ + so = so_new(128, 0); + so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); +/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle);*/ + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); + so_data (so, nvws->channel->vram->handle); + + for (i=1; i<8; i++) { + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->rankine, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->rankine, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->rankine, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->rankine, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->rankine, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->rankine, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->rankine, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->rankine, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->rankine, 0x1e94, 1); + so_data (so, 0x13); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv30_screen_destroy; + + screen->pipe.get_name = nv30_screen_get_name; + screen->pipe.get_vendor = nv30_screen_get_vendor; + screen->pipe.get_param = nv30_screen_get_param; + screen->pipe.get_paramf = nv30_screen_get_paramf; + + screen->pipe.is_format_supported = nv30_screen_surface_format_supported; + + screen->pipe.surface_map = nv30_surface_map; + screen->pipe.surface_unmap = nv30_surface_unmap; + + nv30_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h new file mode 100644 index 0000000000..b7ddc2a959 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -0,0 +1,35 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv30_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nouveau_grobj *rankine; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV30_STATE_MAX]; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *screen) +{ + return (struct nv30_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h new file mode 100644 index 0000000000..dd3a36f78f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -0,0 +1,490 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +# define NV30_VP_INST_COND_FL 0 /* guess */ +# define NV30_VP_INST_COND_LT 1 +# define NV30_VP_INST_COND_EQ 2 +# define NV30_VP_INST_COND_LE 3 +# define NV30_VP_INST_COND_GT 4 +# define NV30_VP_INST_COND_NE 5 +# define NV30_VP_INST_COND_GE 6 +# define NV30_VP_INST_COND_TR 7 /* guess */ +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +# define NV30_VP_INST_OP_NOP 0x00 +# define NV30_VP_INST_OP_RCP 0x02 +# define NV30_VP_INST_OP_RCC 0x03 +# define NV30_VP_INST_OP_RSQ 0x04 +# define NV30_VP_INST_OP_EXP 0x05 +# define NV30_VP_INST_OP_LOG 0x06 +# define NV30_VP_INST_OP_LIT 0x07 +# define NV30_VP_INST_OP_BRA 0x09 +# define NV30_VP_INST_OP_CAL 0x0B +# define NV30_VP_INST_OP_RET 0x0C +# define NV30_VP_INST_OP_LG2 0x0D +# define NV30_VP_INST_OP_EX2 0x0E +# define NV30_VP_INST_OP_SIN 0x0F +# define NV30_VP_INST_OP_COS 0x10 +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +# define NV30_VP_INST_OP_NOPV 0x00 +# define NV30_VP_INST_OP_MOV 0x01 +# define NV30_VP_INST_OP_MUL 0x02 +# define NV30_VP_INST_OP_ADD 0x03 +# define NV30_VP_INST_OP_MAD 0x04 +# define NV30_VP_INST_OP_DP3 0x05 +# define NV30_VP_INST_OP_DP4 0x07 +# define NV30_VP_INST_OP_DPH 0x06 +# define NV30_VP_INST_OP_DST 0x08 +# define NV30_VP_INST_OP_MIN 0x09 +# define NV30_VP_INST_OP_MAX 0x0A +# define NV30_VP_INST_OP_SLT 0x0B +# define NV30_VP_INST_OP_SGE 0x0C +# define NV30_VP_INST_OP_ARL 0x0D +# define NV30_VP_INST_OP_FRC 0x0E +# define NV30_VP_INST_OP_FLR 0x0F +# define NV30_VP_INST_OP_SEQ 0x10 +# define NV30_VP_INST_OP_SFL 0x11 +# define NV30_VP_INST_OP_SGT 0x12 +# define NV30_VP_INST_OP_SLE 0x13 +# define NV30_VP_INST_OP_SNE 0x14 +# define NV30_VP_INST_OP_STR 0x15 +# define NV30_VP_INST_OP_SSG 0x16 +# define NV30_VP_INST_OP_ARR 0x17 +# define NV30_VP_INST_OP_ARA 0x18 +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +# define NV30_VP_INST_IN_NORMAL 2 +# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +# define NV30_VP_INST_IN_COL1 4 +# define NV30_VP_INST_IN_FOGC 5 +# define NV30_VP_INST_IN_TC0 8 +# define NV30_VP_INST_IN_TC(n) (8+n) +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +#define NV30_VP_INST_LAST (1 << 0) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + */ + +//== Opcode / Destination selection == +#define NV30_FP_OP_PROGRAM_END (1 << 0) +#define NV30_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +/* Needs to be set when writing outputs to get expected result.. */ +#define NV30_FP_OP_OUT_REG_HALF (1 << 7) +#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV30_FP_OP_OUTMASK_SHIFT 9 +#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV30_FP_OP_OUT_X (1<<9) +# define NV30_FP_OP_OUT_Y (1<<10) +# define NV30_FP_OP_OUT_Z (1<<11) +# define NV30_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV30_FP_OP_INPUT_SRC_SHIFT 13 +#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV30_FP_OP_INPUT_SRC_COL0 0x1 +# define NV30_FP_OP_INPUT_SRC_COL1 0x2 +# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV30_FP_OP_INPUT_SRC_TC0 0x4 +# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +#define NV30_FP_OP_TEX_UNIT_SHIFT 17 +#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NV30_FP_OP_PRECISION_SHIFT 22 +#define NV30_FP_OP_PRECISION_MASK (3 << 22) +# define NV30_FP_PRECISION_FP32 0 +# define NV30_FP_PRECISION_FP16 1 +# define NV30_FP_PRECISION_FX12 2 +#define NV30_FP_OP_OPCODE_SHIFT 24 +#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV30_FP_OP_OPCODE_NOP 0x00 +# define NV30_FP_OP_OPCODE_MOV 0x01 +# define NV30_FP_OP_OPCODE_MUL 0x02 +# define NV30_FP_OP_OPCODE_ADD 0x03 +# define NV30_FP_OP_OPCODE_MAD 0x04 +# define NV30_FP_OP_OPCODE_DP3 0x05 +# define NV30_FP_OP_OPCODE_DP4 0x06 +# define NV30_FP_OP_OPCODE_DST 0x07 +# define NV30_FP_OP_OPCODE_MIN 0x08 +# define NV30_FP_OP_OPCODE_MAX 0x09 +# define NV30_FP_OP_OPCODE_SLT 0x0A +# define NV30_FP_OP_OPCODE_SGE 0x0B +# define NV30_FP_OP_OPCODE_SLE 0x0C +# define NV30_FP_OP_OPCODE_SGT 0x0D +# define NV30_FP_OP_OPCODE_SNE 0x0E +# define NV30_FP_OP_OPCODE_SEQ 0x0F +# define NV30_FP_OP_OPCODE_FRC 0x10 +# define NV30_FP_OP_OPCODE_FLR 0x11 +# define NV30_FP_OP_OPCODE_KIL 0x12 +# define NV30_FP_OP_OPCODE_PK4B 0x13 +# define NV30_FP_OP_OPCODE_UP4B 0x14 +# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +# define NV30_FP_OP_OPCODE_TEX 0x17 +# define NV30_FP_OP_OPCODE_TXP 0x18 +# define NV30_FP_OP_OPCODE_TXD 0x19 +# define NV30_FP_OP_OPCODE_RCP 0x1A +# define NV30_FP_OP_OPCODE_RSQ 0x1B +# define NV30_FP_OP_OPCODE_EX2 0x1C +# define NV30_FP_OP_OPCODE_LG2 0x1D +# define NV30_FP_OP_OPCODE_LIT 0x1E +# define NV30_FP_OP_OPCODE_LRP 0x1F +# define NV30_FP_OP_OPCODE_STR 0x20 +# define NV30_FP_OP_OPCODE_SFL 0x21 +# define NV30_FP_OP_OPCODE_COS 0x22 +# define NV30_FP_OP_OPCODE_SIN 0x23 +# define NV30_FP_OP_OPCODE_PK2H 0x24 +# define NV30_FP_OP_OPCODE_UP2H 0x25 +# define NV30_FP_OP_OPCODE_POW 0x26 +# define NV30_FP_OP_OPCODE_PK4UB 0x27 +# define NV30_FP_OP_OPCODE_UP4UB 0x28 +# define NV30_FP_OP_OPCODE_PK2US 0x29 +# define NV30_FP_OP_OPCODE_UP2US 0x2A +# define NV30_FP_OP_OPCODE_DP2A 0x2E +# define NV30_FP_OP_OPCODE_TXB 0x31 +# define NV30_FP_OP_OPCODE_RFL 0x36 +# define NV30_FP_OP_OPCODE_DIV 0x3A +#define NV30_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV30_FP_OP_OUT_ABS (1 << 29) +#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV30_FP_OP_COND_SHIFT 18 +#define NV30_FP_OP_COND_MASK (0x07 << 18) +# define NV30_FP_OP_COND_FL 0 +# define NV30_FP_OP_COND_LT 1 +# define NV30_FP_OP_COND_EQ 2 +# define NV30_FP_OP_COND_LE 3 +# define NV30_FP_OP_COND_GT 4 +# define NV30_FP_OP_COND_NE 5 +# define NV30_FP_OP_COND_GE 6 +# define NV30_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV30_FP_OP_DST_SCALE_SHIFT 28 +#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV30_FP_OP_DST_SCALE_1X 0 +#define NV30_FP_OP_DST_SCALE_2X 1 +#define NV30_FP_OP_DST_SCALE_4X 2 +#define NV30_FP_OP_DST_SCALE_8X 3 +#define NV30_FP_OP_DST_SCALE_INV_2X 5 +#define NV30_FP_OP_DST_SCALE_INV_4X 6 +#define NV30_FP_OP_DST_SCALE_INV_8X 7 + + +/* high order bits of SRC2 */ +#define NV30_FP_OP_INDEX_INPUT (1 << 30) + +//== Register selection == +#define NV30_FP_REG_TYPE_SHIFT 0 +#define NV30_FP_REG_TYPE_MASK (3 << 0) +# define NV30_FP_REG_TYPE_TEMP 0 +# define NV30_FP_REG_TYPE_INPUT 1 +# define NV30_FP_REG_TYPE_CONST 2 +#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV30_FP_REG_SRC_HALF (1 << 8) +#define NV30_FP_REG_SWZ_ALL_SHIFT 9 +#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV30_FP_REG_SWZ_X_SHIFT 9 +#define NV30_FP_REG_SWZ_X_MASK (3 << 9) +#define NV30_FP_REG_SWZ_Y_SHIFT 11 +#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV30_FP_REG_SWZ_Z_SHIFT 13 +#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV30_FP_REG_SWZ_W_SHIFT 15 +#define NV30_FP_REG_SWZ_W_MASK (3 << 15) +# define NV30_FP_SWIZZLE_X 0 +# define NV30_FP_SWIZZLE_Y 1 +# define NV30_FP_SWIZZLE_Z 2 +# define NV30_FP_SWIZZLE_W 3 +#define NV30_FP_REG_NEGATE (1 << 17) + +#define NV30SR_NONE 0 +#define NV30SR_OUTPUT 1 +#define NV30SR_INPUT 2 +#define NV30SR_TEMP 3 +#define NV30SR_CONST 4 + +struct nv30_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv30_sreg +nv30_sr(int type, int index) +{ + struct nv30_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv30_sreg +nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) +{ + struct nv30_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv30_sreg +nv30_sr_neg(struct nv30_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_abs(struct nv30_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_scale(struct nv30_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c new file mode 100644 index 0000000000..fc66075c83 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -0,0 +1,724 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +static void * +nv30_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, rankine, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend = hwcso; + nv30->dirty |= NV30_NEW_BLEND; +} + +static void +nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; +/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; + break;*/ + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv30_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv30_sampler_state)); + + ps->fmt = 0; + /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format + are the tx format to use. We should store normalized coord flag + in sampler state structure, and set appropriate format in + nvxx_fragtex_build() + */ + /*NV34TCL_TX_FORMAT_RECT*/ + /*if (!cso->normalized_coords) { + ps->fmt |= (1<<14) ; + }*/ + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); + + ps->en = 0; + + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv30->tex_sampler[unit] = sampler[unit]; + nv30->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv30->nr_samplers; unit++) { + nv30->tex_sampler[unit] = NULL; + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_samplers = nr; + nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void +nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], miptree[unit]); + nv30->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv30->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], NULL); + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_textures = nr; + nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void * +nv30_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, rankine, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->rasterizer = hwcso; + nv30->dirty |= NV30_NEW_RAST; + /*nv30->draw_dirty |= NV30_NEW_RAST;*/ +} + +static void +nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; + + so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref)); + + if (cso->stencil[0].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->zsa = hwcso; + nv30->dirty |= NV30_NEW_ZSA; +} + +static void +nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nv30_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + /*struct nv30_context *nv30 = nv30_context(pipe);*/ + struct nv30_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv30_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ + + return (void *)vp; +} + +static void +nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vertprog = hwcso; + nv30->dirty |= NV30_NEW_VERTPROG; + /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ +} + +static void +nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_vertex_program *vp = hwcso; + + /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ + nv30_vertprog_destroy(nv30, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nv30_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv30_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->fragprog = hwcso; + nv30->dirty |= NV30_NEW_FRAGPROG; +} + +static void +nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_fragment_program *fp = hwcso; + + nv30_fragprog_destroy(nv30, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv30_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend_colour = *bcol; + nv30->dirty |= NV30_NEW_BCOL; +} + +static void +nv30_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->constbuf[shader] = buf->buffer; + nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nv30->dirty |= NV30_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv30->dirty |= NV30_NEW_FRAGPROG; + } +} + +static void +nv30_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->framebuffer = *fb; + nv30->dirty |= NV30_NEW_FB; +} + +static void +nv30_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->stipple, stipple->stipple, 4 * 32); + nv30->dirty |= NV30_NEW_STIPPLE; +} + +static void +nv30_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->scissor = *s; + nv30->dirty |= NV30_NEW_SCISSOR; +} + +static void +nv30_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->viewport = *vpt; + nv30->dirty |= NV30_NEW_VIEWPORT; + /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ +} + +static void +nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); + nv30->vtxbuf_nr = count; + + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); + nv30->vtxelt_nr = count; + + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->edgeflags = bitfield; + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +void +nv30_init_state_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_blend_state = nv30_blend_state_create; + nv30->pipe.bind_blend_state = nv30_blend_state_bind; + nv30->pipe.delete_blend_state = nv30_blend_state_delete; + + nv30->pipe.create_sampler_state = nv30_sampler_state_create; + nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; + nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; + nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + + nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; + nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; + nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; + + nv30->pipe.create_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_create; + nv30->pipe.bind_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_bind; + nv30->pipe.delete_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_delete; + + nv30->pipe.create_vs_state = nv30_vp_state_create; + nv30->pipe.bind_vs_state = nv30_vp_state_bind; + nv30->pipe.delete_vs_state = nv30_vp_state_delete; + + nv30->pipe.create_fs_state = nv30_fp_state_create; + nv30->pipe.bind_fs_state = nv30_fp_state_bind; + nv30->pipe.delete_fs_state = nv30_fp_state_delete; + + nv30->pipe.set_blend_color = nv30_set_blend_color; + nv30->pipe.set_clip_state = nv30_set_clip_state; + nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; + nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; + nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; + nv30->pipe.set_scissor_state = nv30_set_scissor_state; + nv30->pipe.set_viewport_state = nv30_set_viewport_state; + + nv30->pipe.set_edgeflags = nv30_set_edgeflags; + nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; + nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h new file mode 100644 index 0000000000..2023278e37 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -0,0 +1,88 @@ +#ifndef __NV30_STATE_H__ +#define __NV30_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv30_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv30_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv30_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv30_vertex_program { + struct pipe_shader_state pipe; + + boolean translated; + + struct nv30_vertex_program_exec *insns; + unsigned nr_insns; + struct nv30_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + struct nouveau_stateobj *so; +}; + +struct nv30_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv30_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv30_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; + struct nouveau_stateobj *so; +}; + +struct nv30_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c new file mode 100644 index 0000000000..44d43e132a --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -0,0 +1,40 @@ +#include "nv30_context.h" + +static boolean +nv30_state_blend_validate(struct nv30_context *nv30) +{ + so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend = { + .validate = nv30_state_blend_validate, + .dirty = { + .pipe = NV30_NEW_BLEND, + .hw = NV30_STATE_BLEND + } +}; + +static boolean +nv30_state_blend_colour_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv30->blend_colour; + + so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend_colour = { + .validate = nv30_state_blend_colour_validate, + .dirty = { + .pipe = NV30_NEW_BCOL, + .hw = NV30_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c new file mode 100644 index 0000000000..9480695d6e --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -0,0 +1,118 @@ +#include "nv30_context.h" +#include "nv30_state.h" + +static struct nv30_state_entry *render_states[] = { + &nv30_state_framebuffer, + &nv30_state_rasterizer, + &nv30_state_scissor, + &nv30_state_stipple, + &nv30_state_fragprog, + &nv30_state_fragtex, + &nv30_state_vertprog, + &nv30_state_blend, + &nv30_state_blend_colour, + &nv30_state_zsa, + &nv30_state_viewport, + &nv30_state_vbo, + NULL +}; + +static void +nv30_state_do_validate(struct nv30_context *nv30, + struct nv30_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv30->framebuffer; + unsigned i; + + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv30_state_entry *e = *states; + + if (nv30->dirty & e->dirty.pipe) { + if (e->validate(nv30)) { + nv30->state.dirty |= (1ULL << e->dirty.hw); + } + } + + states++; + } + nv30->dirty = 0; +} + +void +nv30_state_emit(struct nv30_context *nv30) +{ + struct nv30_state *state = &nv30->state; + struct nv30_screen *screen = nv30->screen; + unsigned i, samplers; + uint64_t states; + + if (nv30->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV30_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv30->pctx_id; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv30->screen->state[i]); + if (state->hw[i]) + so_emit(nv30->nvws, nv30->screen->state[i]); + states &= ~(1ULL << i); + } + + state->dirty = 0; + + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv30->nvws, + state->hw[NV30_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]); + if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]); +} + +boolean +nv30_state_validate(struct nv30_context *nv30) +{ +#if 0 + boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; + + if (nv30->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv30->fallback_swtnl & nv30->dirty) + != nv30->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv30->pipe.flush(&nv30->pipe, 0, NULL); + nv30->dirty |= (NV30_NEW_VIEWPORT | + NV30_NEW_VERTPROG | + NV30_NEW_ARRAYS); + nv30->render_mode = HW; + } +#endif + nv30_state_do_validate(nv30, render_states); +#if 0 + if (nv30->fallback_swtnl || nv30->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); +#endif + return TRUE; +} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c new file mode 100644 index 0000000000..73bdf7e56c --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -0,0 +1,140 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +static boolean +nv30_state_framebuffer_validate(struct nv30_context *nv30) +{ + struct pipe_framebuffer_state *fb = &nv30->framebuffer; + struct pipe_surface *rt[2], *zeta = NULL; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = 0; + for (i = 0; i < fb->num_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->num_cbufs; i++) + assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ | + log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/; + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + uint32_t pitch = rt[0]->stride; + if (zeta) { + pitch |= (zeta->stride << 16); + } else { + pitch |= (pitch << 16); + } + + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->stride); + } + + if (zeta_format) { + so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + /* TODO: allocate LMA depth buffer */ + } + + so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv30->screen->rankine, 0x1d88, 1); + so_data (so, (1 << 12) | h); + /* Wonder why this is needed, context should all be set to zero on init */ + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); + + so_ref(so, &nv30->state.hw[NV30_STATE_FB]); + return TRUE; +} + +struct nv30_state_entry nv30_state_framebuffer = { + .validate = nv30_state_framebuffer_validate, + .dirty = { + .pipe = NV30_NEW_FB, + .hw = NV30_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c new file mode 100644 index 0000000000..6d1b60e043 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_rasterizer_validate(struct nv30_context *nv30) +{ + so_ref(nv30->rasterizer->so, + &nv30->state.hw[NV30_STATE_RAST]); + return TRUE; +} + +struct nv30_state_entry nv30_state_rasterizer = { + .validate = nv30_state_rasterizer_validate, + .dirty = { + .pipe = NV30_NEW_RAST, + .hw = NV30_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c new file mode 100644 index 0000000000..1db9bc1795 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv30_context.h" + +static boolean +nv30_state_scissor_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct pipe_scissor_state *s = &nv30->scissor; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_SCISSOR] && + (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) + return FALSE; + nv30->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); + if (nv30->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); + return TRUE; +} + +struct nv30_state_entry nv30_state_scissor = { + .validate = nv30_state_scissor_validate, + .dirty = { + .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, + .hw = NV30_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c new file mode 100644 index 0000000000..41b42813b4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv30_context.h" + +static boolean +nv30_state_stipple_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv30->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); + return TRUE; +} + +struct nv30_state_entry nv30_state_stipple = { + .validate = nv30_state_stipple_validate, + .dirty = { + .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, + .hw = NV30_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c new file mode 100644 index 0000000000..951d40ebfd --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -0,0 +1,70 @@ +#include "nv30_context.h" + +static boolean +nv30_state_viewport_validate(struct nv30_context *nv30) +{ + struct pipe_viewport_state *vpt = &nv30->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv30->state.hw[NV30_STATE_VIEWPORT] && + (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && + nv30->state.viewport_bypass == bypass) + return FALSE; + nv30->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); +*/ } else { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 0x110); +*/ } + /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ + so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); + + so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); + return TRUE; +} + +struct nv30_state_entry nv30_state_viewport = { + .validate = nv30_state_viewport_validate, + .dirty = { + .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, + .hw = NV30_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c new file mode 100644 index 0000000000..0940b7269b --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_zsa_validate(struct nv30_context *nv30) +{ + so_ref(nv30->zsa->so, + &nv30->state.hw[NV30_STATE_ZSA]); + return TRUE; +} + +struct nv30_state_entry nv30_state_zsa = { + .validate = nv30_state_zsa_validate, + .dirty = { + .pipe = NV30_NEW_ZSA, + .hw = NV30_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c new file mode 100644 index 0000000000..d3376a73bf --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -0,0 +1,77 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv30_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static void +nv30_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + if (do_flip) { + /*XXX: This dodgyness will do for now for correctness. But, + * need to investigate whether the 2D engine is able to + * manage a flip (perhaps SIFM?), if not, use the 3D engine + */ + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } +} + +static void +nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_winsys *nvws = nv30->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv30_init_surface_functions(struct nv30_context *nv30) +{ + nv30->pipe.surface_copy = nv30_surface_copy; + nv30->pipe.surface_fill = nv30_surface_fill; +} diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c new file mode 100644 index 0000000000..556f981d4a --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -0,0 +1,556 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV34TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nv30->screen->pipe; + unsigned type; + + if (!ib) { + nv30->idxbuf = NULL; + nv30->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv30->idxbuf || + type != nv30->idxbuf_format) { + nv30->dirty |= NV30_NEW_ARRAYS; + nv30->idxbuf = ib; + nv30->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_grobj *rankine = nv30->screen->rankine; + unsigned type, ncomp; + void *map; + + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV34TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +boolean +nv30_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; + + nv30_vbo_set_idxbuf(nv30, NULL, 0); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; + } + + while (count) { + unsigned vc, nr; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static boolean +nv30_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map; + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + switch (ib_size) { + case 1: + nv30_draw_elements_u08(nv30, map, mode, start, count); + break; + case 2: + nv30_draw_elements_u16(nv30, map, mode, start, count); + break; + case 4: + nv30_draw_elements_u32(nv30, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + ws->buffer_unmap(ws, ib); + return TRUE; +} + +static boolean +nv30_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; + + while (count) { + unsigned nr, vc; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + return TRUE; +} + +boolean +nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + boolean idxbuf; + + idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; + } + + if (idxbuf) { + nv30_draw_elements_vbo(pipe, mode, start, count); + } else { + nv30_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static boolean +nv30_vbo_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct pipe_buffer *ib = nv30->idxbuf; + unsigned ib_format = nv30->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv30->edgeflags) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); + + for (hw = 0; hw < nv30->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv30->vtxelt[hw]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, rankine, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); + so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); + so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); + return FALSE; +} + +struct nv30_state_entry nv30_state_vbo = { + .validate = nv30_vbo_validate, + .dirty = { + .pipe = NV30_NEW_ARRAYS, + .hw = 0, + } +}; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c new file mode 100644 index 0000000000..72824559e8 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) + +struct nv30_vpc { + struct nv30_vertex_program *vp; + + struct nv30_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv30_sreg *imm; + unsigned nr_imm; +}; + +static struct nv30_sreg +temp(struct nv30_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static struct nv30_sreg +constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv30_vertex_program *vp = vpc->vp; + struct nv30_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv30_sr(NV30SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_TEMP: + sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV30SR_INPUT: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); + break; + case NV30SR_CONST: + sr |= (NV30_VP_SRC_REG_TYPE_CONST << + NV30_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV30SR_NONE: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + * \u/ + * + */ + + switch (pos) { + case 0: + hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> + NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << + NV30_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> + NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << + NV30_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +{ + struct nv30_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV30SR_TEMP: + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + break; + case NV30SR_OUTPUT: + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + break; + default: + assert(0); + } +} + +static void +nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, + struct nv30_sreg s2) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + + if (dst.type == NV30SR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv30_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv30_sr(NV30SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv30_sreg src[3], dst, tmp; + struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV30_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_vertprog_prepare(struct nv30_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +static void +nv30_vertprog_translate(struct nv30_context *nv30, + struct nv30_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv30_vpc *vpc = NULL; + + tgsi_dump(vp->pipe.tokens,0); + + vpc = CALLOC(1, sizeof(struct nv30_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv30_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv30_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv30_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + FREE(vpc); +} + +static boolean +nv30_vertprog_validate(struct nv30_context *nv30) +{ + struct nouveau_winsys *nvws = nv30->nvws; + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + vp = nv30->vertprog; + constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv30_vertprog_translate(nv30, vp); + if (!vp->translated) + return FALSE; + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv30->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv30->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV30_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv30_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) { + ws->buffer_unmap(ws, constbuf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, vp->insns[i].data[0], vp->insns[i].data[1], + vp->insns[i].data[2], vp->insns[i].data[3]); + } +#endif + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { + so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv30->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = 0; + so_ref(NULL, &vp->so); +} + +struct nv30_state_entry nv30_state_vertprog = { + .validate = nv30_vertprog_validate, + .dirty = { + .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, + .hw = NV30_STATE_VERTPROG, + } +}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 0000000000..9c8eadf7e4 --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,37 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +DRIVER_SOURCES = \ + nv40_clear.c \ + nv40_context.c \ + nv40_draw.c \ + nv40_fragprog.c \ + nv40_fragtex.c \ + nv40_miptree.c \ + nv40_query.c \ + nv40_screen.c \ + nv40_state.c \ + nv40_state_blend.c \ + nv40_state_emit.c \ + nv40_state_fb.c \ + nv40_state_rasterizer.c \ + nv40_state_scissor.c \ + nv40_state_stipple.c \ + nv40_state_viewport.c \ + nv40_state_zsa.c \ + nv40_surface.c \ + nv40_vbo.c \ + nv40_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 0000000000..59efd620e3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 0000000000..cc63dd734b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (1); + } + + FIRE_RING(fence); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (nv40->draw) + draw_destroy(nv40->draw); + FREE(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_context *nv40; + struct nouveau_winsys *nvws = screen->nvws; + + nv40 = CALLOC(1, sizeof(struct nv40_context)); + if (!nv40) + return NULL; + nv40->screen = screen; + nv40->pctx_id = pctx_id; + + nv40->nvws = nvws; + + nv40->pipe.winsys = ws; + nv40->pipe.screen = pscreen; + nv40->pipe.destroy = nv40_destroy; + nv40->pipe.draw_arrays = nv40_draw_arrays; + nv40->pipe.draw_elements = nv40_draw_elements; + nv40->pipe.clear = nv40_clear; + nv40->pipe.flush = nv40_flush; + + nv40_init_query_functions(nv40); + nv40_init_surface_functions(nv40); + nv40_init_state_functions(nv40); + + /* Create, configure, and install fallback swtnl path */ + nv40->draw = draw_create(); + draw_wide_point_threshold(nv40->draw, 9999999.0); + draw_wide_line_threshold(nv40->draw, 9999999.0); + draw_enable_line_stipple(nv40->draw, FALSE); + draw_enable_point_sprites(nv40->draw, FALSE); + draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + + return &nv40->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 0000000000..adcfbdd85a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,233 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv40_screen *ctx = nv40->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv40_state_index { + NV40_STATE_FB = 0, + NV40_STATE_VIEWPORT = 1, + NV40_STATE_BLEND = 2, + NV40_STATE_RAST = 3, + NV40_STATE_ZSA = 4, + NV40_STATE_BCOL = 5, + NV40_STATE_CLIP = 6, + NV40_STATE_SCISSOR = 7, + NV40_STATE_STIPPLE = 8, + NV40_STATE_FRAGPROG = 9, + NV40_STATE_VERTPROG = 10, + NV40_STATE_FRAGTEX0 = 11, + NV40_STATE_FRAGTEX1 = 12, + NV40_STATE_FRAGTEX2 = 13, + NV40_STATE_FRAGTEX3 = 14, + NV40_STATE_FRAGTEX4 = 15, + NV40_STATE_FRAGTEX5 = 16, + NV40_STATE_FRAGTEX6 = 17, + NV40_STATE_FRAGTEX7 = 18, + NV40_STATE_FRAGTEX8 = 19, + NV40_STATE_FRAGTEX9 = 20, + NV40_STATE_FRAGTEX10 = 21, + NV40_STATE_FRAGTEX11 = 22, + NV40_STATE_FRAGTEX12 = 23, + NV40_STATE_FRAGTEX13 = 24, + NV40_STATE_FRAGTEX14 = 25, + NV40_STATE_FRAGTEX15 = 26, + NV40_STATE_VERTTEX0 = 27, + NV40_STATE_VERTTEX1 = 28, + NV40_STATE_VERTTEX2 = 29, + NV40_STATE_VERTTEX3 = 30, + NV40_STATE_VTXBUF = 31, + NV40_STATE_VTXFMT = 32, + NV40_STATE_VTXATTR = 33, + NV40_STATE_MAX = 34 +}; + +#include "nv40_screen.h" + +#define NV40_NEW_BLEND (1 << 0) +#define NV40_NEW_RAST (1 << 1) +#define NV40_NEW_ZSA (1 << 2) +#define NV40_NEW_SAMPLER (1 << 3) +#define NV40_NEW_FB (1 << 4) +#define NV40_NEW_STIPPLE (1 << 5) +#define NV40_NEW_SCISSOR (1 << 6) +#define NV40_NEW_VIEWPORT (1 << 7) +#define NV40_NEW_BCOL (1 << 8) +#define NV40_NEW_VERTPROG (1 << 9) +#define NV40_NEW_FRAGPROG (1 << 10) +#define NV40_NEW_ARRAYS (1 << 11) +#define NV40_NEW_UCP (1 << 12) + +struct nv40_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nv40_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned viewport_bypass; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NV40_STATE_MAX]; +}; + +struct nv40_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv40_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nv40_state state; + struct { + struct nv40_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + const unsigned *edgeflags; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ + return (struct nv40_context *)pipe; +} + +struct nv40_state_entry { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_destroy(struct nv40_context *, + struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_destroy(struct nv40_context *, + struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 0000000000..8e56cdc2fe --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,349 @@ +#include "pipe/p_shader_tokens.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nv40_render_stage { + struct draw_stage stage; + struct nv40_context *nv40; + unsigned prim; +}; + +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ + return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ + unsigned i; + + for (i = 0; i < nv40->swtnl.nr_attribs; i++) { + unsigned idx = nv40->swtnl.draw[i]; + unsigned hw = nv40->swtnl.hw[i]; + + switch (nv40->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + OUT_RING (fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nv40_render_stage *rs = nv40_render_stage(stage); + struct nv40_context *nv40 = rs->nv40; + struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (pb->remaining < ((count * 20) + 6)) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(NULL); + nv40_state_emit(nv40); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nv40_render_vertex(nv40, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (pb->remaining < ((count * 20) + 6)) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); +} + +static void +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); +} + +static void +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); +} + +static void +nv40_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nv40_render_stage *rs = nv40_render_stage(draw); + struct nv40_context *nv40 = rs->nv40; + + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nv40_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static INLINE void +emit_mov(struct nv40_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nv40_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nv40_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); + unsigned i; + + emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ + struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + + if (!nv40->swtnl.vertprog) + nv40->swtnl.vertprog = create_drawvp(nv40); + + render->nv40 = nv40; + render->stage.draw = nv40->draw; + render->stage.point = nv40_render_point; + render->stage.line = nv40_render_line; + render->stage.tri = nv40_render_tri; + render->stage.flush = nv40_render_flush; + render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; + render->stage.destroy = nv40_render_destroy; + + return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + unsigned i; + void *map; + + if (!nv40_state_validate_swtnl(nv40)) + return FALSE; + nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); + nv40_state_emit(nv40); + + for (i = 0; i < nv40->vtxbuf_nr; i++) { + map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nv40->draw, i, map); + } + + if (idxbuf) { + map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nv40->draw, 0, NULL); + } + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nv40->draw, map, nr); + } + + draw_arrays(nv40->draw, mode, start, count); + + for (i = 0; i < nv40->vtxbuf_nr; i++) + ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + + if (idxbuf) + ws->buffer_unmap(ws, idxbuf); + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) + ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nv40->draw); + pipe->flush(pipe, 0, NULL); + + return TRUE; +} + +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned a = nv40->swtnl.nr_attribs++; + + nv40->swtnl.hw[a] = hw; + nv40->swtnl.emit[a] = emit; + nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nv40->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { + .validate = nv40_state_vtxfmt_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 0000000000..91dcbebda0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,991 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { + struct nv40_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_temp; + + int num_regs; + + unsigned inst_offset; + unsigned have_const; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv40_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ + int idx = ffs(~fpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + fpc->r_temps |= (1 << idx); + fpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0; +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ + struct nv40_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_INPUT: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + break; + case NV40SR_OUTPUT: + sr |= NV40_FP_REG_SRC_HALF; + /* fall-through */ + case NV40SR_TEMP: + sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + break; + case NV40SR_CONST: + if (!fpc->have_const) { + grow_insns(fpc, 4); + fpc->have_const = 1; + } + + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv40_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + break; + case NV40SR_NONE: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV40SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV40SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV40_FP_OP_OUT_REG_HALF; + } + break; + case NV40SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV40_FP_OP_OPCODE_KIL) + fp->fp_control |= NV40TCL_FP_CONTROL_KIL; + hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV40_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + + nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = fpc->r_temp[fsrc->SrcRegister.Index]; + break; + /* NV40 fragprog result regs are just temps, so this is simple */ + case TGSI_FILE_OUTPUT: + src = fpc->r_result[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + return fpc->r_result[fdst->DstRegister.Index]; + case TGSI_FILE_TEMPORARY: + return fpc->r_temp[fdst->DstRegister.Index]; + case TGSI_FILE_NULL: + return nv40_sr(NV40SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv40_sr(NV40SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DDX: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDX, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDY, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nv40_sr(NV40SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SEQ: + arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SLE: + arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SNE: + arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(fpc); + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + unsigned hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = 1; + break; + case TGSI_SEMANTIC_COLOR: + switch (fdec->Semantic.SemanticIndex) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + default: + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_temps |= (1 << hw); + return TRUE; +} + +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, i; + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + } + + return TRUE; + +out_err: + if (fpc->r_temp) + FREE(fpc->r_temp); + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->num_regs = 2; + + if (!nv40_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (fpc->r_temp) + FREE(fpc->r_temp); + FREE(fpc); +} + +static void +nv40_fragprog_upload(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct pipe_buffer *constbuf = + nv40->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; + nv40_fragprog_translate(nv40, fp); + if (!fp->translated) { + nv40->fallback_swrast |= NV40_NEW_FRAGPROG; + return FALSE; + } + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv40_fragprog_upload(nv40, fp); + + so = so_new(4, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_ref(so, &fp->so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv40_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv40_fragprog_upload(nv40, fp); + } + + if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + if (fp->insn_len) + FREE(fp->insn); +} + +struct nv40_state_entry nv40_state_fragprog = { + .validate = nv40_fragprog_validate, + .dirty = { + .pipe = NV40_NEW_FRAGPROG, + .hw = NV40_STATE_FRAGPROG + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 0000000000..0227d22620 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,168 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ + NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ + NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ + NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ + ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ + (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; + int sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), + _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + {}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return NULL; +} + + +static struct nouveau_stateobj * +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ + struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; + struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct pipe_texture *pt = &nv40mt->base; + struct nv40_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs, txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv40_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = ps->fmt; + txf |= tf->format | 0x8000; + txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + + if (1) /* XXX */ + txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV40TCL_TEX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv40mt->level[0].pitch; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + return so; +} + +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct nv40_state *state = &nv40->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(2, 0); + so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + samplers = nv40->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = nv40_fragtex_build(nv40, unit); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + nv40->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nv40_state_entry nv40_state_fragtex = { + .validate = nv40_fragtex_validate, + .dirty = { + .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 0000000000..00ce6be985 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,198 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *mt) +{ + struct pipe_texture *pt = &mt->base; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f, pitch; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + pitch = pt->width[0]; + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + pitch = pt->nblocksx[l]; + pitch = align(pitch, 64); + + mt->level[l].pitch = pitch * pt->block.size; + mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + mt->level[l].image_offset[f] = offset; + offset += mt->level[l].pitch * pt->height[l]; + } + } + + mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; + + mt = MALLOC(sizeof(struct nv40_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + break; + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + nv40_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + assert(mt->shadow_surface); + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv40_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->winsys = pscreen->winsys; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = mt->level[level].image_offset[zslice]; + } else { + ps->offset = mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); + FREE(ps); +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv40_miptree_create; + pscreen->texture_release = nv40_miptree_release; + pscreen->get_tex_surface = nv40_miptree_surface_new; + pscreen->tex_surface_release = nv40_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 0000000000..9b9a43f49d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ + return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv40_query *q; + + q = CALLOC(1, sizeof(struct nv40_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + if (q->object) + nv40->nvws->res_free(&q->object); + FREE(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); + + BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(NULL); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + struct nouveau_winsys *nvws = nv40->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv40->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv40->screen->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv40->screen->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_query = nv40_query_create; + nv40->pipe.destroy_query = nv40_query_destroy; + nv40->pipe.begin_query = nv40_query_begin; + nv40->pipe.end_query = nv40_query_end; + nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 0000000000..9657a19c50 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,368 @@ +#include "pipe/p_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; /* We have 4 - but unsupported currently */ + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + if (screen->curie->grclass == NV40TCL) + return 1; + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_map; + void *map; + + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + if (!mt->shadow_tex) { + unsigned old_tex_usage = surface->texture->tex_usage; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; + mt->shadow_tex = screen->texture_create(screen, surface->texture); + surface->texture->tex_usage = old_tex_usage; + + assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); + mt->shadow_surface = screen->get_tex_surface + ( + screen, mt->shadow_tex, + surface->face, surface->level, surface->zslice, + surface->usage + ); + } + + surface_to_map = mt->shadow_surface; + } + else + surface_to_map = surface; + + assert(surface_to_map); + + map = ws->buffer_map(ws, surface_to_map->buffer, flags); + if (!map) + return NULL; + + return map + surface_to_map->offset; +} + +static void +nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_unmap; + + /* TODO: Copy from shadow just before push buffer is flushed instead. + There are probably some programs that map/unmap excessively + before rendering. */ + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + assert(mt->shadow_tex); + + surface_to_unmap = mt->shadow_surface; + } + else + surface_to_unmap = surface; + + assert(surface_to_unmap); + + ws->buffer_unmap(ws, surface_to_unmap->buffer); + + if (surface_to_unmap != surface) { + struct nv40_screen *nvscreen = nv40_screen(screen); + + nvscreen->nvws->surface_copy(nvscreen->nvws, + surface, 0, 0, + surface_to_unmap, 0, 0, + surface->width, surface->height); + } +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->curie); + + FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + struct nouveau_stateobj *so; + unsigned curie_class; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, screen->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv40_screen_destroy; + + screen->pipe.get_name = nv40_screen_get_name; + screen->pipe.get_vendor = nv40_screen_get_vendor; + screen->pipe.get_param = nv40_screen_get_param; + screen->pipe.get_paramf = nv40_screen_get_paramf; + + screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + + screen->pipe.surface_map = nv40_surface_map; + screen->pipe.surface_unmap = nv40_surface_unmap; + + nv40_screen_init_miptree_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 0000000000..c04a1275a0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,35 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv40_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ + return (struct nv40_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 0000000000..854dccf548 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,556 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXL 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_STR 0x20 +# define NV40_FP_OP_OPCODE_SFL 0x21 +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X 0 +#define NV40_FP_OP_DST_SCALE_2X 1 +#define NV40_FP_OP_DST_SCALE_4X 2 +#define NV40_FP_OP_DST_SCALE_8X 3 +#define NV40_FP_OP_DST_SCALE_INV_2X 5 +#define NV40_FP_OP_DST_SCALE_INV_4X 6 +#define NV40_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NV40_FP_REG_SRC_HALF (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +#ifndef NV40_SHADER_NO_FUCKEDNESS +#define NV40SR_NONE 0 +#define NV40SR_OUTPUT 1 +#define NV40SR_INPUT 2 +#define NV40SR_TEMP 3 +#define NV40SR_CONST 4 + +struct nv40_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ + struct nv40_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ + struct nv40_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} +#endif + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 0000000000..255c4b294d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,740 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend = hwcso; + nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV40TCL_TEX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + } + + return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv40_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv40_sampler_state)); + + ps->fmt = 0; + if (!cso->normalized_coords) + ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy >= 2.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } + + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv40->tex_sampler[unit] = sampler[unit]; + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_samplers; unit++) { + nv40->tex_sampler[unit] = NULL; + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_samplers = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], miptree[unit]); + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], NULL); + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_textures = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, curie, NV40TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : + NV40TCL_SHADE_MODEL_SMOOTH); + + so_method(so, curie, NV40TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, curie, NV40TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, curie, NV40TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->rasterizer = hwcso; + nv40->dirty |= NV40_NEW_RAST; + nv40->draw_dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref)); + + if (cso->stencil[0].enabled) { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].write_mask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].value_mask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->zsa = hwcso; + nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv40_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->vertprog = hwcso; + nv40->dirty |= NV40_NEW_VERTPROG; + nv40->draw_dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nv40->draw, vp->draw); + nv40_vertprog_destroy(nv40, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv40_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->fragprog = hwcso; + nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_fragment_program *fp = hwcso; + + nv40_fragprog_destroy(nv40, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend_colour = *bcol; + nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->clip = *clip; + nv40->dirty |= NV40_NEW_UCP; + nv40->draw_dirty |= NV40_NEW_UCP; +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->constbuf[shader] = buf->buffer; + nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nv40->dirty |= NV40_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv40->dirty |= NV40_NEW_FRAGPROG; + } +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->framebuffer = *fb; + nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->stipple, stipple->stipple, 4 * 32); + nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->scissor = *s; + nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->viewport = *vpt; + nv40->dirty |= NV40_NEW_VIEWPORT; + nv40->draw_dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); + nv40->vtxbuf_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); + nv40->vtxelt_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->edgeflags = bitfield; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_blend_state = nv40_blend_state_create; + nv40->pipe.bind_blend_state = nv40_blend_state_bind; + nv40->pipe.delete_blend_state = nv40_blend_state_delete; + + nv40->pipe.create_sampler_state = nv40_sampler_state_create; + nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; + nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + + nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; + nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; + nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + + nv40->pipe.create_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_create; + nv40->pipe.bind_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_bind; + nv40->pipe.delete_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_delete; + + nv40->pipe.create_vs_state = nv40_vp_state_create; + nv40->pipe.bind_vs_state = nv40_vp_state_bind; + nv40->pipe.delete_vs_state = nv40_vp_state_delete; + + nv40->pipe.create_fs_state = nv40_fp_state_create; + nv40->pipe.bind_fs_state = nv40_fp_state_bind; + nv40->pipe.delete_fs_state = nv40_fp_state_delete; + + nv40->pipe.set_blend_color = nv40_set_blend_color; + nv40->pipe.set_clip_state = nv40_set_clip_state; + nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; + nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; + nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; + nv40->pipe.set_scissor_state = nv40_set_scissor_state; + nv40->pipe.set_viewport_state = nv40_set_viewport_state; + + nv40->pipe.set_edgeflags = nv40_set_edgeflags; + nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; + nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 0000000000..9c55903ae3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,91 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv40_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv40_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv40_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv40_vertex_program { + struct pipe_shader_state pipe; + + struct draw_vertex_shader *draw; + + boolean translated; + + struct pipe_clip_state ucp; + + struct nv40_vertex_program_exec *insns; + unsigned nr_insns; + struct nv40_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + uint32_t clip_ctrl; + struct nouveau_stateobj *so; +}; + +struct nv40_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv40_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv40_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + struct nouveau_stateobj *so; +}; + +struct nv40_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 0000000000..95e6d7394f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ + so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { + .validate = nv40_state_blend_validate, + .dirty = { + .pipe = NV40_NEW_BLEND, + .hw = NV40_STATE_BLEND + } +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv40->blend_colour; + + so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { + .validate = nv40_state_blend_colour_validate, + .dirty = { + .pipe = NV40_NEW_BCOL, + .hw = NV40_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 0000000000..52ec4c044b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,184 @@ +#include "nv40_context.h" +#include "nv40_state.h" +#include "draw/draw_context.h" + +static struct nv40_state_entry *render_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vbo, + NULL +}; + +static struct nv40_state_entry *swtnl_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vtxfmt, + NULL +}; + +static void +nv40_state_do_validate(struct nv40_context *nv40, + struct nv40_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv40->framebuffer; + unsigned i; + + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv40_state_entry *e = *states; + + if (nv40->dirty & e->dirty.pipe) { + if (e->validate(nv40)) + nv40->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nv40->dirty = 0; +} + +void +nv40_state_emit(struct nv40_context *nv40) +{ + struct nv40_state *state = &nv40->state; + struct nv40_screen *screen = nv40->screen; + unsigned i, samplers; + uint64_t states; + + if (nv40->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV40_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv40->pctx_id; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv40->screen->state[i]); + if (state->hw[i]) + so_emit(nv40->nvws, nv40->screen->state[i]); + states &= ~(1ULL << i); + } + + if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | + (1ULL << NV40_STATE_FRAGTEX0))) { + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); + } + + state->dirty = 0; + + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, + state->hw[NV40_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); + if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} + +boolean +nv40_state_validate(struct nv40_context *nv40) +{ + boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + + if (nv40->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv40->fallback_swtnl & nv40->dirty) + != nv40->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = HW; + } + + nv40_state_do_validate(nv40, render_states); + if (nv40->fallback_swtnl || nv40->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ + struct draw_context *draw = nv40->draw; + + /* Setup for swtnl */ + if (nv40->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = SWTNL; + } + + if (nv40->draw_dirty & NV40_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nv40->vertprog->draw); + + if (nv40->draw_dirty & NV40_NEW_RAST) + draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + + if (nv40->draw_dirty & NV40_NEW_UCP) + draw_set_clip_state(draw, &nv40->clip); + + if (nv40->draw_dirty & NV40_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nv40->viewport); + + if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_edgeflags(draw, nv40->edgeflags); + draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); + draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); + } + + nv40_state_do_validate(nv40, swtnl_states); + if (nv40->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); + return FALSE; + } + + nv40->draw_dirty = 0; + return TRUE; +} + diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 0000000000..28592d71c3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,155 @@ +#include "nv40_context.h" +#include "nouveau/nouveau_util.h" + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ + struct pipe_framebuffer_state *fb = &nv40->framebuffer; + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = 0; + for (i = 0; i < fb->num_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->num_cbufs; i++) + assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | + log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | + log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; + } + else + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->stride); + so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->stride); + } + + if (zeta_format) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->stride); + } + + so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv40->screen->curie, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + so_ref(so, &nv40->state.hw[NV40_STATE_FB]); + return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { + .validate = nv40_state_framebuffer_validate, + .dirty = { + .pipe = NV40_NEW_FB, + .hw = NV40_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 0000000000..9ecda5990f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ + so_ref(nv40->rasterizer->so, + &nv40->state.hw[NV40_STATE_RAST]); + return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { + .validate = nv40_state_rasterizer_validate, + .dirty = { + .pipe = NV40_NEW_RAST, + .hw = NV40_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 0000000000..285239ef41 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->scissor; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_SCISSOR] && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + return FALSE; + nv40->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (nv40->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); + return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 0000000000..b51024ad9b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); + return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 0000000000..869a55b405 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,67 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ + struct pipe_viewport_state *vpt = &nv40->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv40->state.hw[NV40_STATE_VIEWPORT] && + (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && + nv40->state.viewport_bypass == bypass) + return FALSE; + nv40->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 1); + } else { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 0x110); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); + return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { + .validate = nv40_state_viewport_validate, + .dirty = { + .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, + .hw = NV40_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 0000000000..fb760677c8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ + so_ref(nv40->zsa->so, + &nv40->state.hw[NV40_STATE_ZSA]); + return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { + .validate = nv40_state_zsa_validate, + .dirty = { + .pipe = NV40_NEW_ZSA, + .hw = NV40_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 0000000000..576af7c59e --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,77 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static void +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_winsys *nvws = nv40->nvws; + + if (do_flip) { + /*XXX: This dodgyness will do for now for correctness. But, + * need to investigate whether the 2D engine is able to + * manage a flip (perhaps SIFM?), if not, use the 3D engine + */ + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ + nv40->pipe.surface_copy = nv40_surface_copy; + nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 0000000000..09f6e79d32 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,555 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV40TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nv40->screen->pipe; + unsigned type; + + if (!ib) { + nv40->idxbuf = NULL; + nv40->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv40->idxbuf || + type != nv40->idxbuf_format) { + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->idxbuf = ib; + nv40->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + unsigned type, ncomp; + void *map; + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV40TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + nv40_vbo_set_idxbuf(nv40, NULL, 0); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + while (count) { + unsigned vc, nr; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map; + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + switch (ib_size) { + case 1: + nv40_draw_elements_u08(nv40, map, mode, start, count); + break; + case 2: + nv40_draw_elements_u16(nv40, map, mode, start, count); + break; + case 4: + nv40_draw_elements_u32(nv40, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + ws->buffer_unmap(ws, ib); + return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + while (count) { + unsigned nr, vc; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + boolean idxbuf; + + idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + if (idxbuf) { + nv40_draw_elements_vbo(pipe, mode, start, count); + } else { + nv40_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *curie = nv40->screen->curie; + struct pipe_buffer *ib = nv40->idxbuf; + unsigned ib_format = nv40->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv40->edgeflags) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); + + for (hw = 0; hw < nv40->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->pitch) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, curie, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); + so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); + return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { + .validate = nv40_vbo_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS, + .hw = 0, + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 0000000000..1392fe956f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,1070 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nv40_vpc { + struct nv40_vertex_program *vp; + + struct nv40_vertex_program_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_address; + struct nv40_sreg *r_temp; + + struct nv40_sreg *imm; + unsigned nr_imm; + + unsigned hpos_idx; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv40_vertex_program *vp = vpc->vp; + struct nv40_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_TEMP: + sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV40SR_INPUT: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + break; + case NV40SR_CONST: + sr |= (NV40_VP_SRC_REG_TYPE_CONST << + NV40_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV40SR_NONE: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + + switch (pos) { + case 0: + hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << + NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << + NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ + struct nv40_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV40SR_TEMP: + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + break; + case NV40SR_OUTPUT: + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + case NV40_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + default: + break; + } + + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; + default: + assert(0); + } +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, + struct nv40_sreg s2) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = vpc->r_temp[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv40_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->DstRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->DstRegister.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->DstRegister.Index]; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= tgsi_mask(1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(vpc); + arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); + arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_VP_INST_DEST_POS; + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV40_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->DeclarationRange.Last > high_addr) { + high_addr = + fdec->DeclarationRange.Last; + } + break; +#endif + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->FullDstRegisters[0]; + + if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { + if (fdst->DstRegister.Index > high_addr) + high_addr = fdst->DstRegister.Index; + } + + } + break; +#endif + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +static void +nv40_vertprog_translate(struct nv40_context *nv40, + struct nv40_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv40_vpc *vpc = NULL; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int i; + + vpc = CALLOC(1, sizeof(struct nv40_vpc)); + if (!vpc) + return; + vpc->vp = vp; + + if (!nv40_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code the the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv40_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { + struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_POS); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_CLIP(i)); + struct nv40_sreg ceqn = constant(vpc, -1, + nv40->clip.ucp[i][0], + nv40->clip.ucp[i][1], + nv40->clip.ucp[i][2], + nv40->clip.ucp[i][3]); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); + FREE(vpc); +} + +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + if (nv40->render_mode == HW) { + vp = nv40->vertprog; + constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + + if ((nv40->dirty & NV40_NEW_UCP) || + memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { + nv40_vertprog_destroy(nv40, vp); + memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + } + } else { + vp = nv40->swtnl.vertprog; + constbuf = NULL; + } + + /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; + nv40_vertprog_translate(nv40, vp); + if (!vp->translated) { + nv40->fallback_swtnl |= NV40_NEW_VERTPROG; + return FALSE; + } + +check_gpu_resources: + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(7, 0); + so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + so_data (so, vp->clip_ctrl); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv40->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV40_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv40_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) + ws->buffer_unmap(ws, constbuf); + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { + so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv40->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); +} + +struct nv40_state_entry nv40_state_vertprog = { + .validate = nv40_vertprog_validate, + .dirty = { + .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, + .hw = NV40_STATE_VERTPROG, + } +}; + diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile new file mode 100644 index 0000000000..be30400c03 --- /dev/null +++ b/src/gallium/drivers/nv50/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv50 + +DRIVER_SOURCES = \ + nv50_clear.c \ + nv50_context.c \ + nv50_draw.c \ + nv50_miptree.c \ + nv50_query.c \ + nv50_program.c \ + nv50_screen.c \ + nv50_state.c \ + nv50_state_validate.c \ + nv50_surface.c \ + nv50_tex.c \ + nv50_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c new file mode 100644 index 0000000000..a31a42d6b5 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +void +nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; + struct pipe_scissor_state sc, s_sc = nv50->scissor; + unsigned dirty = nv50->dirty; + + nv50->dirty = 0; + + if (ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM) { + fb.num_cbufs = 0; + fb.zsbuf = ps; + } else { + fb.num_cbufs = 1; + fb.cbufs[0] = ps; + fb.zsbuf = NULL; + } + fb.width = ps->width; + fb.height = ps->height; + pipe->set_framebuffer_state(pipe, &fb); + + sc.minx = sc.miny = 0; + sc.maxx = fb.width; + sc.maxy = fb.height; + pipe->set_scissor_state(pipe, &sc); + + nv50_state_validate(nv50); + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + BEGIN_RING(tesla, 0x0d80, 4); + OUT_RINGf (ubyte_to_float((clearValue >> 16) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 8) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 0) & 0xff)); + OUT_RINGf (ubyte_to_float((clearValue >> 24) & 0xff)); + BEGIN_RING(tesla, 0x19d0, 1); + OUT_RING (0x3c); + break; + case PIPE_FORMAT_Z24S8_UNORM: + BEGIN_RING(tesla, 0x0d90, 1); + OUT_RINGf ((float)(clearValue >> 8) * (1.0 / 16777215.0)); + BEGIN_RING(tesla, 0x0da0, 1); + OUT_RING (clearValue & 0xff); + BEGIN_RING(tesla, 0x19d0, 1); + OUT_RING (0x03); + break; + default: + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + clearValue); + break; + } + + pipe->set_framebuffer_state(pipe, &s_fb); + pipe->set_scissor_state(pipe, &s_sc); + nv50->dirty |= dirty; + + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} + diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c new file mode 100644 index 0000000000..b02c53f209 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static void +nv50_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + FIRE_RING(fence); +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + draw_destroy(nv50->draw); + FREE(nv50); +} + + +static void +nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + nv50->screen = screen; + nv50->pctx_id = pctx_id; + + nv50->pipe.winsys = pipe_winsys; + nv50->pipe.screen = pscreen; + + nv50->pipe.destroy = nv50_destroy; + + nv50->pipe.set_edgeflags = nv50_set_edgeflags; + nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.clear = nv50_clear; + + nv50->pipe.flush = nv50_flush; + + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_query_functions(nv50); + + nv50->draw = draw_create(); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + + return &nv50->pipe; +} + + diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h new file mode 100644 index 0000000000..061a4c064b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -0,0 +1,199 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_stateobj.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv50_screen *ctx = nv50->screen +#include "nouveau/nouveau_push.h" + +#include "nv50_screen.h" +#include "nv50_program.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 +#define NV50_CB_PUPLOAD 6 + +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_ZSA (1 << 1) +#define NV50_NEW_BLEND_COLOUR (1 << 2) +#define NV50_NEW_STIPPLE (1 << 3) +#define NV50_NEW_SCISSOR (1 << 4) +#define NV50_NEW_VIEWPORT (1 << 5) +#define NV50_NEW_RASTERIZER (1 << 6) +#define NV50_NEW_FRAMEBUFFER (1 << 7) +#define NV50_NEW_VERTPROG (1 << 8) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) +#define NV50_NEW_SAMPLER (1 << 13) +#define NV50_NEW_TEXTURE (1 << 14) + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_miptree_level { + struct pipe_buffer **image; + int *image_offset; + unsigned image_dirty_cpu[512/32]; + unsigned image_dirty_gpu[512/32]; +}; + +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; + + struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS]; + int image_nr; + int total_size; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + +struct nv50_surface { + struct pipe_surface base; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *pt) +{ + return (struct nv50_surface *)pt; +} + +struct nv50_state { + unsigned dirty; + + struct nouveau_stateobj *fb; + struct nouveau_stateobj *blend; + struct nouveau_stateobj *blend_colour; + struct nouveau_stateobj *zsa; + struct nouveau_stateobj *rast; + struct nouveau_stateobj *stipple; + struct nouveau_stateobj *scissor; + unsigned scissor_enabled; + struct nouveau_stateobj *viewport; + unsigned viewport_bypass; + struct nouveau_stateobj *tsc_upload; + struct nouveau_stateobj *tic_upload; + struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vtxfmt; + struct nouveau_stateobj *vtxbuf; +}; + +struct nv50_context { + struct pipe_context pipe; + + struct nv50_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + struct nv50_state state; + + unsigned dirty; + struct nv50_blend_stateobj *blend; + struct nv50_zsa_stateobj *zsa; + struct nv50_rasterizer_stateobj *rasterizer; + struct pipe_blend_color blend_colour; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct nv50_program *vertprog; + struct nv50_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + unsigned *sampler[PIPE_MAX_SAMPLERS]; + unsigned sampler_nr; + struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; + unsigned miptree_nr; +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} + +extern void nv50_init_surface_functions(struct nv50_context *nv50); +extern void nv50_init_state_functions(struct nv50_context *nv50); +extern void nv50_init_query_functions(struct nv50_context *nv50); + +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); + +/* nv50_vbo.c */ +extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); +extern void nv50_vbo_validate(struct nv50_context *nv50); + +/* nv50_clear.c */ +extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv50_program.c */ +extern void nv50_vertprog_validate(struct nv50_context *nv50); +extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); + +/* nv50_state_validate.c */ +extern boolean nv50_state_validate(struct nv50_context *nv50); + +/* nv50_tex.c */ +extern void nv50_tex_validate(struct nv50_context *); + +/* nv50_miptree.c */ +extern void nv50_miptree_sync(struct pipe_screen *, struct nv50_miptree *, + unsigned level, unsigned image); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c new file mode 100644 index 0000000000..2f6f607261 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -0,0 +1,89 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50_context.h" + +struct nv50_render_stage { + struct draw_stage stage; + struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + + return &rs->stage; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c new file mode 100644 index 0000000000..63a23d06b8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -0,0 +1,297 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +static struct pipe_texture * +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_texture *pt = &mt->base; + unsigned usage, width = tmp->width[0], height = tmp->height[0]; + unsigned depth = tmp->depth[0]; + int i, l; + + mt->base = *tmp; + mt->base.refcount = 1; + mt->base.screen = pscreen; + + usage = PIPE_BUFFER_USAGE_PIXEL; + switch (pt->format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + usage |= NOUVEAU_BUFFER_USAGE_ZETA; + break; + default: + break; + } + + switch (pt->target) { + case PIPE_TEXTURE_3D: + mt->image_nr = pt->depth[0]; + break; + case PIPE_TEXTURE_CUBE: + mt->image_nr = 6; + break; + default: + mt->image_nr = 1; + break; + } + + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); + lvl->image = CALLOC(mt->image_nr, sizeof(struct pipe_buffer *)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (i = 0; i < mt->image_nr; i++) { + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + int size; + + size = align(pt->width[l], 8) * pt->block.size; + size = align(size, 64); + size *= align(pt->height[l], 8) * pt->block.size; + + lvl->image[i] = ws->buffer_create(ws, 256, 0, size); + lvl->image_offset[i] = mt->total_size; + + mt->total_size += size; + } + } + + mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static INLINE void +mark_dirty(uint32_t *flags, unsigned image) +{ + flags[image / 32] |= (1 << (image % 32)); +} + +static INLINE void +mark_clean(uint32_t *flags, unsigned image) +{ + flags[image / 32] &= ~(1 << (image % 32)); +} + +static INLINE int +is_dirty(uint32_t *flags, unsigned image) +{ + return !!(flags[image / 32] & (1 << (image % 32))); +} + +static void +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + + *ppt = NULL; + + if (--pt->refcount <= 0) { + struct nv50_miptree *mt = nv50_miptree(pt); + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + FREE(mt); + } +} + +void +nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt, + unsigned level, unsigned image) +{ + struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *dst, *src; + unsigned face = 0, zslice = 0; + + if (!is_dirty(lvl->image_dirty_cpu, image)) + return; + + if (mt->base.target == PIPE_TEXTURE_CUBE) + face = image; + else + if (mt->base.target == PIPE_TEXTURE_3D) + zslice = image; + + /* Mark as clean already - so we don't continually call this function + * trying to get a GPU_WRITE pipe_surface! + */ + mark_clean(lvl->image_dirty_cpu, image); + + /* Pretend we're doing CPU access so we get the backing pipe_surface + * and not a view into the larger miptree. + */ + src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + /* Pretend we're only reading with the GPU so surface doesn't get marked + * as dirtied by the GPU. + */ + dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_GPU_READ); + + nvws->surface_copy(nvws, dst, 0, 0, src, 0, 0, dst->width, dst->height); + + pscreen->tex_surface_release(pscreen, &dst); + pscreen->tex_surface_release(pscreen, &src); +} + +/* The reverse of the above */ +void +nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt, + unsigned level, unsigned image) +{ + struct nouveau_winsys *nvws = nv50_screen(pscreen)->nvws; + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *dst, *src; + unsigned face = 0, zslice = 0; + + if (!is_dirty(lvl->image_dirty_gpu, image)) + return; + + if (mt->base.target == PIPE_TEXTURE_CUBE) + face = image; + else + if (mt->base.target == PIPE_TEXTURE_3D) + zslice = image; + + mark_clean(lvl->image_dirty_gpu, image); + + src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_GPU_READ); + dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + nvws->surface_copy(nvws, dst, 0, 0, src, 0, 0, dst->width, dst->height); + + pscreen->tex_surface_release(pscreen, &dst); + pscreen->tex_surface_release(pscreen, &src); +} + +static struct pipe_surface * +nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct nv50_surface *s; + struct pipe_surface *ps; + int img; + + if (pt->target == PIPE_TEXTURE_CUBE) + img = face; + else + if (pt->target == PIPE_TEXTURE_3D) + img = zslice; + else + img = 0; + + s = CALLOC_STRUCT(nv50_surface); + if (!s) + return NULL; + ps = &s->base; + + ps->refcount = 1; + ps->winsys = pscreen->winsys; + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = ps->width * ps->block.size; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + nv50_miptree_sync_cpu(pscreen, mt, level, img); + + ps->offset = 0; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(pscreen, &ps->buffer, lvl->image[img]); + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + mark_dirty(lvl->image_dirty_cpu, img); + } else { + nv50_miptree_sync(pscreen, mt, level, img); + + ps->offset = lvl->image_offset[img]; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); + + if (flags & PIPE_BUFFER_USAGE_GPU_WRITE) + mark_dirty(lvl->image_dirty_gpu, img); + } + + return ps; +} + +static void +nv50_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + struct nv50_surface *s = nv50_surface(ps); + + *psurface = NULL; + + if (--ps->refcount <= 0) { + pipe_texture_reference(&ps->texture, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); + FREE(s); + } +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface_new; + pscreen->tex_surface_release = nv50_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 0000000000..7686f746eb --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,1779 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv50_context.h" + +#define NV50_SU_MAX_TEMP 64 +//#define NV50_PROGRAM_DUMP + +/* ARL - gallium craps itself on progs/vp/arl.txt + * + * MSB - Like MAD, but MUL+SUB + * - Fuck it off, introduce a way to negate args for ops that + * support it. + * + * Look into inlining IMMD for ops other than MOV (make it general?) + * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * ie. SUB R0, R0.yzxw, R0 + * + * Things to check with renouveau: + * FP attr/result assignment - how? + * attrib + * - 0x16bc maps vp output onto fp hpos + * - 0x16c0 maps vp output onto fp col0 + * result + * - colr always 0-3 + * - depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * - 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * - XX == FP high something + */ +struct nv50_reg { + enum { + P_TEMP, + P_ATTR, + P_RESULT, + P_CONST, + P_IMMD + } type; + int index; + + int hw; + int neg; +}; + +struct nv50_pc { + struct nv50_program *p; + + /* hw resources */ + struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + + /* tgsi resources */ + struct nv50_reg *temp; + int temp_nr; + struct nv50_reg *attr; + int attr_nr; + struct nv50_reg *result; + int result_nr; + struct nv50_reg *param; + int param_nr; + struct nv50_reg *immd; + float *immd_buf; + int immd_nr; + + struct nv50_reg *temp_temp[16]; + unsigned temp_temp_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ + int i; + + if (reg->type == P_RESULT) { + if (pc->p->cfg.high_result < (reg->hw + 1)) + pc->p->cfg.high_result = reg->hw + 1; + } + + if (reg->type != P_TEMP) + return; + + if (reg->hw >= 0) { + /*XXX: do this here too to catch FP temp-as-attr usage.. + * not clean, but works */ + if (pc->p->cfg.high_temp < (reg->hw + 1)) + pc->p->cfg.high_temp = reg->hw + 1; + return; + } + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!(pc->r_temp[i])) { + pc->r_temp[i] = reg; + reg->hw = i; + if (pc->p->cfg.high_temp < (i + 1)) + pc->p->cfg.high_temp = i + 1; + return; + } + } + + assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ + struct nv50_reg *r; + int i; + + if (dst && dst->type == P_TEMP && dst->hw == -1) + return dst; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!pc->r_temp[i]) { + r = CALLOC_STRUCT(nv50_reg); + r->type = P_TEMP; + r->index = -1; + r->hw = i; + pc->r_temp[i] = r; + return r; + } + } + + assert(0); + return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ + if (r->index == -1) { + unsigned hw = r->hw; + + FREE(pc->r_temp[hw]); + pc->r_temp[hw] = NULL; + } +} + +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ + int i; + + if ((idx + 4) >= NV50_SU_MAX_TEMP) + return 1; + + if (pc->r_temp[idx] || pc->r_temp[idx + 1] || + pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) + return alloc_temp4(pc, dst, idx + 1); + + for (i = 0; i < 4; i++) { + dst[i] = CALLOC_STRUCT(nv50_reg); + dst[i]->type = P_TEMP; + dst[i]->index = -1; + dst[i]->hw = idx + i; + pc->r_temp[idx + i] = dst[i]; + } + + return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ + int i; + + for (i = 0; i < 4; i++) + free_temp(pc, reg[i]); +} + +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ + if (pc->temp_temp_nr >= 16) + assert(0); + + pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ + int i; + + for (i = 0; i < pc->temp_temp_nr; i++) + free_temp(pc, pc->temp_temp[i]); + pc->temp_temp_nr = 0; +} + +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + (pc->immd_nr + 1) * 4 * sizeof(float)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = y; + pc->immd_buf[(pc->immd_nr * 4) + 2] = z; + pc->immd_buf[(pc->immd_nr * 4) + 3] = w; + + return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + unsigned hw; + + hw = ctor_immd(pc, f, 0, 0, 0) * 4; + r->type = P_IMMD; + r->hw = hw; + r->index = -1; + return r; +} + +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + + e->param.index = -1; + return e; +} + +static void +emit(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + struct nv50_program *p = pc->p; + + if (p->exec_tail) + p->exec_tail->next = e; + if (!p->exec_head) + p->exec_head = e; + p->exec_tail = e; + p->exec_size += (e->inst[0] & 1) ? 2 : 1; +} + +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); + +static boolean +is_long(struct nv50_program_exec *e) +{ + if (e->inst[0] & 1) + return TRUE; + return FALSE; +} + +static boolean +is_immd(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 3) + return TRUE; + return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + e->inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x3 << 4) | (1 << 6)); + e->inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + if (is_long(e)) + return; + + e->inst[0] |= 1; + set_pred(pc, 0xf, 0, e); + set_pred_wr(pc, 0, 0, e); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) +{ + if (dst->type == P_RESULT) { + set_long(pc, e); + e->inst[1] |= 0x00000008; + } + + alloc_reg(pc, dst); + e->inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) +{ + unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + + set_long(pc, e); + /*XXX: can't be predicated - bits overlap.. catch cases where both + * are required and avoid them. */ + set_pred(pc, 0, 0, e); + set_pred_wr(pc, 0, 0, e); + + e->inst[1] |= 0x00000002 | 0x00000001; + e->inst[0] |= (val & 0x3f) << 16; + e->inst[1] |= (val >> 6) << 2; +} + +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, struct nv50_reg *iv) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x80000000; + set_dst(pc, dst, e); + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); + if (iv) { + e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } + + emit(pc, e); +} + +static void +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, + struct nv50_program_exec *e) +{ + set_long(pc, e); +#if 1 + e->inst[1] |= (1 << 22); +#else + if (src->type == P_IMMD) { + e->inst[1] |= (NV50_CB_PMISC << 22); + } else { + if (pc->p->type == PIPE_SHADER_VERTEX) + e->inst[1] |= (NV50_CB_PVP << 22); + else + e->inst[1] |= (NV50_CB_PFP << 22); + } +#endif + + e->param.index = src->hw; + e->param.shift = s; + e->param.mask = m << (s % 32); +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x10000000; + + set_dst(pc, dst, e); + + if (0 && dst->type != P_RESULT && src->type == P_IMMD) { + set_immd(pc, src, e); + /*XXX: 32-bit, but steals part of "half" reg space - need to + * catch and handle this case if/when we do half-regs + */ + e->inst[0] |= 0x00008000; + } else + if (src->type == P_IMMD || src->type == P_CONST) { + set_long(pc, e); + set_data(pc, src, 0x7f, 9, e); + e->inst[1] |= 0x20000000; /* src0 const? */ + } else { + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); + } + + /* We really should support "half" instructions here at some point, + * but I don't feel confident enough about them yet. + */ + set_long(pc, e); + if (is_long(e) && !is_immd(e)) { + e->inst[1] |= 0x04000000; /* 32-bit */ + e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + } + + emit(pc, e); +} + +static boolean +check_swap_src_0_1(struct nv50_pc *pc, + struct nv50_reg **s0, struct nv50_reg **s1) +{ + struct nv50_reg *src0 = *s0, *src1 = *s1; + + if (src0->type == P_CONST) { + if (src1->type != P_CONST) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } else + if (src1->type == P_ATTR) { + if (src0->type != P_ATTR) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } + + return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x00800000)); + if (e->inst[0] & 0x01000000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 16, e); + e->inst[0] |= 0x00800000; + } + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + set_long(pc, e); + + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x01000000)); + if (e->inst[0] & 0x00800000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 32+14, e); + e->inst[0] |= 0x01000000; + } + } + + alloc_reg(pc, src); + e->inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xc0000000; + set_long(pc, e); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + if (is_long(e)) + set_src_2(pc, src1, e); + else + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (sub << 29); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_long(pc, e); + if (check_swap_src_0_1(pc, &src0, &src1)) + e->inst[1] |= 0x04000000; + else + e->inst[1] |= 0x08000000; + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); + + emit(pc, e); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + set_long(pc, e); + e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, + struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x90000000; + if (sub) { + set_long(pc, e); + e->inst[1] |= (sub << 29); + } + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29) | 0x00004000; + + emit(pc, e); +} + +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29); + + emit(pc, e); +} + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_reg *rdst; + + assert(c_op <= 7); + if (check_swap_src_0_1(pc, &src0, &src1)) + c_op = inv_cop[c_op]; + + rdst = dst; + if (dst->type != P_TEMP) + dst = alloc_temp(pc, NULL); + + /* set.u32 */ + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (3 << 29); + e->inst[1] |= (c_op << 14); + /*XXX: breaks things, .u32 by default? + * decuda will disasm as .u16 and use .lo/.hi regs, but this + * doesn't seem to match what the hw actually does. + inst[1] |= 0x04000000; << breaks things.. .u32 by default? + */ + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + emit(pc, e); + + /* cvt.f32.u32 */ + e = exec(pc); + e->inst[0] = 0xa0000001; + e->inst[1] = 0x64014780; + set_dst(pc, rdst, e); + set_src_0(pc, dst, e); + emit(pc, e); + + if (dst != rdst) + free_temp(pc, dst); +} + +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x08000000; /* integer mode */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *v, struct nv50_reg *e) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + + emit_flop(pc, 3, temp, v); + emit_mul(pc, temp, temp, e); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, dst, temp); + + free_temp(pc, temp); +} + +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0); + struct nv50_reg *zero = alloc_immd(pc, 0.0); + struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); + struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); + struct nv50_reg *tmp[4]; + + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + if (mask & (3 << 1)) { + if (mask & (1 << 1)) + tmp[0] = dst[1]; + else + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, tmp[0], src[0], zero); + } + + if (mask & (1 << 2)) { + set_pred_wr(pc, 1, 0, pc->p->exec_tail); + + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); + + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, pc->p->exec_tail); + } +} + +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xa0000000; /* delta */ + e->inst[1] |= (7 << 29); /* delta */ + e->inst[1] |= 0x04000000; /* negate arg0? probably not */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_kil(struct nv50_pc *pc, struct nv50_reg *src) +{ + struct nv50_program_exec *e; + const int r_pred = 1; + + /* Sets predicate reg ? */ + e = exec(pc); + e->inst[0] = 0xa00001fd; + e->inst[1] = 0xc4014788; + set_src_0(pc, src, e); + set_pred_wr(pc, 1, r_pred, e); + emit(pc, e); + + /* This is probably KILP */ + e = exec(pc); + e->inst[0] = 0x000001fe; + set_long(pc, e); + set_pred(pc, 1 /* LT? */, r_pred, e); + emit(pc, e); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ + struct nv50_reg *r = NULL; + struct nv50_reg *temp; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); + switch (c) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_SAMPLER: + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; + default: + assert(0); + break; + } + + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_TOGGLE: + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_SET: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + emit_neg(pc, temp, r); + r = temp; + break; + default: + assert(0); + break; + } + + return r; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ + const struct tgsi_full_instruction *inst = &tok->FullInstruction; + struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + unsigned mask, sat, unit; + int i, c; + + mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + else + dst[c] = NULL; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + + if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) + unit = fs->SrcRegister.Index; + + for (c = 0; c < 4; c++) + src[i][c] = tgsi_src(pc, c, fs); + } + + if (sat) { + for (c = 0; c < 4; c++) { + rdst[c] = dst[c]; + dst[c] = temp_temp(pc); + } + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_abs(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_COS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_DP3: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DP4: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_mad(pc, temp, src[0][3], src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DPH: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_add(pc, temp, src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DST: + { + struct nv50_reg *one = alloc_immd(pc, 1.0); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + if (mask & (1 << 1)) + emit_mul(pc, dst[1], src[0][1], src[1][1]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], src[0][2]); + if (mask & (1 << 3)) + emit_mov(pc, dst[3], src[1][3]); + FREE(one); + } + break; + case TGSI_OPCODE_EX2: + temp = alloc_temp(pc, NULL); + emit_preex2(pc, temp, src[0][0]); + emit_flop(pc, 6, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_FLR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_FRC: + temp = alloc_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, temp, src[0][c]); + emit_sub(pc, dst[c], src[0][c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_KIL: + emit_kil(pc, src[0][0]); + emit_kil(pc, src[0][1]); + emit_kil(pc, src[0][2]); + emit_kil(pc, src[0][3]); + break; + case TGSI_OPCODE_LIT: + emit_lit(pc, &dst[0], mask, &src[0][0]); + break; + case TGSI_OPCODE_LG2: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_LRP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + /*XXX: we can do better than this */ + temp = alloc_temp(pc, NULL); + emit_neg(pc, temp, src[0][c]); + emit_mad(pc, temp, temp, src[2][c], src[2][c]); + emit_mad(pc, dst[c], src[0][c], src[1][c], temp); + free_temp(pc, temp); + } + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } + break; + case TGSI_OPCODE_MAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MOV: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_POW: + temp = alloc_temp(pc, NULL); + emit_pow(pc, temp, src[0][0], src[1][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 0, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_RSQ: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 2, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_SCS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + if (mask & (1 << 0)) + emit_flop(pc, 5, dst[0], temp); + if (mask & (1 << 1)) + emit_flop(pc, 4, dst[1], temp); + break; + case TGSI_OPCODE_SGE: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 6, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SIN: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_SLT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sub(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + struct nv50_reg *t[4]; + struct nv50_program_exec *e; + + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); + + e = exec(pc); + e->inst[0] = 0xf6400000; + e->inst[0] |= (unit << 9); + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t[0], e); + emit(pc, e); + + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); + + free_temp4(pc, t); + } + break; + case TGSI_OPCODE_XPD: + temp = alloc_temp(pc, NULL); + if (mask & (1 << 0)) { + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + } + if (mask & (1 << 1)) { + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + } + if (mask & (1 << 2)) { + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_END: + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); + return FALSE; + } + + if (sat) { + for (c = 0; c < 4; c++) { + struct nv50_program_exec *e; + + if (!(mask & (1 << c))) + continue; + e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], e); + set_src_0(pc, dst[c], e); + emit(pc, e); + } + } + + kill_temp_temp(pc); + return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context p; + boolean ret = FALSE; + unsigned i, c; + + tgsi_parse_init(&p, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm = + &p.FullToken.FullImmediate; + + ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *d; + unsigned last; + + d = &p.FullToken.FullDeclaration; + last = d->DeclarationRange.Last; + + switch (d->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (pc->temp_nr < (last + 1)) + pc->temp_nr = last + 1; + break; + case TGSI_FILE_OUTPUT: + if (pc->result_nr < (last + 1)) + pc->result_nr = last + 1; + break; + case TGSI_FILE_INPUT: + if (pc->attr_nr < (last + 1)) + pc->attr_nr = last + 1; + break; + case TGSI_FILE_CONSTANT: + if (pc->param_nr < (last + 1)) + pc->param_nr = last + 1; + break; + case TGSI_FILE_SAMPLER: + break; + default: + NOUVEAU_ERR("bad decl file %d\n", + d->Declaration.File); + goto out_err; + } + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + break; + default: + break; + } + } + + if (pc->temp_nr) { + pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); + if (!pc->temp) + goto out_err; + + for (i = 0; i < pc->temp_nr; i++) { + for (c = 0; c < 4; c++) { + pc->temp[i*4+c].type = P_TEMP; + pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].index = i; + } + } + } + + if (pc->attr_nr) { + struct nv50_reg *iv = NULL; + int aid = 0; + + pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); + if (!pc->attr) + goto out_err; + + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + iv = alloc_temp(pc, NULL); + emit_interp(pc, iv, iv, NULL); + emit_flop(pc, 0, iv, iv); + aid++; + } + + for (i = 0; i < pc->attr_nr; i++) { + struct nv50_reg *a = &pc->attr[i*4]; + + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg *at = + alloc_temp(pc, NULL); + pc->attr[i*4+c].type = at->type; + pc->attr[i*4+c].hw = at->hw; + pc->attr[i*4+c].index = at->index; + } else { + pc->p->cfg.vp.attr[aid/32] |= + (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } + } + + if (pc->p->type != PIPE_SHADER_FRAGMENT) + continue; + + emit_interp(pc, &a[0], &a[0], iv); + emit_interp(pc, &a[1], &a[1], iv); + emit_interp(pc, &a[2], &a[2], iv); + emit_interp(pc, &a[3], &a[3], iv); + } + + if (iv) + free_temp(pc, iv); + } + + if (pc->result_nr) { + int rid = 0; + + pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); + if (!pc->result) + goto out_err; + + for (i = 0; i < pc->result_nr; i++) { + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + pc->result[i*4+c].type = P_TEMP; + pc->result[i*4+c].hw = -1; + } else { + pc->result[i*4+c].type = P_RESULT; + pc->result[i*4+c].hw = rid++; + } + pc->result[i*4+c].index = i; + } + } + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); + if (!pc->param) + goto out_err; + + for (i = 0; i < pc->param_nr; i++) { + for (c = 0; c < 4; c++) { + pc->param[i*4+c].type = P_CONST; + pc->param[i*4+c].hw = rid++; + pc->param[i*4+c].index = i; + } + } + } + + if (pc->immd_nr) { + int rid = pc->param_nr * 4; + + pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + if (!pc->immd) + goto out_err; + + for (i = 0; i < pc->immd_nr; i++) { + for (c = 0; c < 4; c++) { + pc->immd[i*4+c].type = P_IMMD; + pc->immd[i*4+c].hw = rid++; + pc->immd[i*4+c].index = i; + } + } + } + + ret = TRUE; +out_err: + tgsi_parse_free(&p); + return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ + struct tgsi_parse_context parse; + struct nv50_pc *pc; + boolean ret; + + pc = CALLOC_STRUCT(nv50_pc); + if (!pc) + return FALSE; + pc->p = p; + pc->p->cfg.high_temp = 4; + + ret = nv50_program_tx_prep(pc); + if (ret == FALSE) + goto out_cleanup; + + tgsi_parse_init(&parse, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + const union tgsi_full_token *tok = &parse.FullToken; + + tgsi_parse_token(&parse); + + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + ret = nv50_program_tx_insn(pc, tok); + if (ret == FALSE) + goto out_err; + break; + default: + break; + } + } + + if (p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg out; + + out.type = P_TEMP; + for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) + emit_mov(pc, &out, &pc->result[out.hw]); + } + + assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); + pc->p->exec_tail->inst[1] |= 0x00000001; + + p->param_nr = pc->param_nr * 4; + p->immd_nr = pc->immd_nr * 4; + p->immd = pc->immd_buf; + +out_err: + tgsi_parse_free(&parse); + +out_cleanup: + return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ + if (nv50_program_tx(p) == FALSE) + assert(0); + p->translated = TRUE; +} + +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, + unsigned start, unsigned count) +{ + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(tesla, 0x00000f00, 1); + OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (map, nr); + + map += nr; + start += nr; + count -= nr; + } +} + +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct pipe_winsys *ws = nv50->pipe.winsys; + unsigned nr = p->param_nr + p->immd_nr; + + if (!p->data && nr) { + struct nouveau_resource *heap = nv50->screen->vp_data_heap; + + if (nvws->res_alloc(heap, nr, p, &p->data)) { + while (heap->next && heap->size < nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, nr, p, &p->data)) + assert(0); + } + } + + if (p->param_nr) { + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); + nv50_program_upload_data(nv50, map, p->data->start, + p->param_nr); + ws->buffer_unmap(ws, nv50->constbuf[p->type]); + } + + if (p->immd_nr) { + nv50_program_upload_data(nv50, p->immd, + p->data->start + p->param_nr, + p->immd_nr); + } +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nv50_program_exec *e; + struct nouveau_stateobj *so; + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + unsigned start, count, *up, *ptr; + boolean upload = FALSE; + + if (!p->buffer) { + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + upload = TRUE; + } + + if (p->data && p->data->start != p->data_start) { + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci; + + if (e->param.index < 0) + continue; + ei = e->param.shift >> 5; + ci = e->param.index + p->data->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } + + p->data_start = p->data->start; + upload = TRUE; + } + + if (!upload) + return; + +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("-------\n"); + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + } + +#endif + + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + *(ptr++) = e->inst[0]; + if (is_long(e)) + *(ptr++) = e->inst[1]; + } + + so = so_new(4,2); + so_method(so, nv50->screen->tesla, 0x1280, 3); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + + start = 0; count = p->exec_size; + while (count) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + unsigned nr; + + so_emit(nvws, so); + + nr = MIN2(count, 2047); + nr = MIN2(nvws->channel->pushbuf->remaining, nr); + if (nvws->channel->pushbuf->remaining < (nr + 3)) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(tesla, 0x0f00, 1); + OUT_RING ((start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(tesla, 0x40000f04, nr); + OUT_RINGp (up + start, nr); + + start += nr; + count -= nr; + } + + FREE(up); + so_ref(NULL, &so); +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->vertprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(13, 2); + so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x16ac, 2); + so_data (so, p->cfg.high_result); //8); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x140c, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.vertprog); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->fragprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(64, 2); + so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x00040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_data (so, 0x0b0a0908); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1414, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.fragprog); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_screen *pscreen = nv50->pipe.screen; + + while (p->exec_head) { + struct nv50_program_exec *e = p->exec_head; + + p->exec_head = e->next; + FREE(e); + } + p->exec_tail = NULL; + p->exec_size = 0; + + if (p->buffer) + pipe_buffer_reference(pscreen, &p->buffer, NULL); + + nv50->screen->nvws->res_free(&p->data); + + p->translated = 0; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 0000000000..78deed6a38 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,45 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv50_program_exec { + struct nv50_program_exec *next; + + unsigned inst[2]; + struct { + int index; + unsigned mask; + unsigned shift; + } param; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned type; + struct nv50_program_exec *exec_head; + struct nv50_program_exec *exec_tail; + unsigned exec_size; + struct nouveau_resource *data; + unsigned data_start; + + struct pipe_buffer *buffer; + + float *immd; + unsigned immd_nr; + unsigned param_nr; + + struct { + unsigned high_temp; + unsigned high_result; + struct { + unsigned attr[2]; + } vp; + } cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c new file mode 100644 index 0000000000..b923c820eb --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -0,0 +1,129 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +struct nv50_query { + struct pipe_buffer *buffer; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = CALLOC_STRUCT(nv50_query); + + assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); + q->type = type; + + q->buffer = ws->buffer_create(ws, 256, 0, 16); + if (!q->buffer) { + FREE(q); + return NULL; + } + + return (struct pipe_query *)q; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + + if (q) { + pipe_buffer_reference(pipe, &q->buffer, NULL); + FREE(q); + } +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + + BEGIN_RING(tesla, 0x1530, 1); + OUT_RING (1); + BEGIN_RING(tesla, 0x1514, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + + BEGIN_RING(tesla, 0x1b00, 4); + OUT_RELOCh(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (0x00000000); + OUT_RING (0x0100f002); + FIRE_RING (NULL); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = nv50_query(pq); + + /*XXX: Want to be able to return FALSE here instead of blocking + * until the result is available.. + */ + + if (!q->ready) { + uint32_t *map = ws->buffer_map(ws, q->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + q->result = map[1]; + q->ready = TRUE; + ws->buffer_unmap(ws, q->buffer); + } + + *result = q->result; + return q->ready; +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_query = nv50_query_create; + nv50->pipe.destroy_query = nv50_query_destroy; + nv50->pipe.begin_query = nv50_query_begin; + nv50->pipe.end_query = nv50_query_end; + nv50->pipe.get_query_result = nv50_query_result; +} diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 0000000000..ef46233f83 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,329 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +#include "nouveau/nouveau_stateobj.h" + +#define NV5X_GRCLASS5097_CHIPSETS 0x00000001 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000050 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000014 + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + struct nouveau_stateobj *so; + unsigned tesla_class = 0, ret; + unsigned chipset = nvws->channel->device->chipset; + int i; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 3D object */ + if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { + NOUVEAU_ERR("Not a G8x chipset\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + switch (chipset & 0xf0) { + case 0x50: + if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x5097; + break; + case 0x80: + if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + case 0x90: + if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + default: + break; + } + + if (tesla_class == 0) { + NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Sync notifier */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static tesla init */ + so = so_new(256, 20); + + so_method(so, screen->tesla, 0x1558, 1); + so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), + NV50TCL_DMA_UNK0__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), + NV50TCL_DMA_UNK1__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, 0x121c, 1); + so_data (so, 1); + + so_method(so, screen->tesla, 0x13bc, 1); + so_data (so, 0x54); + so_method(so, screen->tesla, 0x13ac, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x16b8, 1); + so_data (so, 8); + + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + + /* Vertex array limits - max them out */ + for (i = 0; i < 16; i++) { + so_method(so, screen->tesla, 0x1080 + (i * 8), 2); + so_data (so, 0x000000ff); + so_data (so, 0xffffffff); + } + + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->tesla, 0x1234, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x1458, 1); + so_data (so, 1); + + so_emit(nvws, so); + so_ref(so, &screen->static_init); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + + screen->pipe.destroy = nv50_screen_destroy; + + screen->pipe.get_name = nv50_screen_get_name; + screen->pipe.get_vendor = nv50_screen_get_vendor; + screen->pipe.get_param = nv50_screen_get_param; + screen->pipe.get_paramf = nv50_screen_get_paramf; + + screen->pipe.is_format_supported = nv50_screen_is_format_supported; + + nv50_screen_init_miptree_functions(&screen->pipe); + nv50_surface_init_screen_functions(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 0000000000..400ddcef06 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,33 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + struct nouveau_grobj *tesla; + struct nouveau_notifier *sync; + + struct pipe_buffer *constbuf; + struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; + + struct nouveau_stateobj *static_init; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +void nv50_surface_init_screen_functions(struct pipe_screen *); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c new file mode 100644 index 0000000000..38c1d938b8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -0,0 +1,664 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); + unsigned cmask = 0, i; + + /*XXX ignored: + * - dither + */ + + if (cso->blend_enable == 0) { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 0); + } else { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 1); + so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); + so_data (so, nvgl_blend_eqn(cso->rgb_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, nvgl_blend_eqn(cso->alpha_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); + so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); + } + + if (cso->logicop_enable == 0 ) { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } else { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } + + if (cso->colormask & PIPE_MASK_R) + cmask |= (1 << 0); + if (cso->colormask & PIPE_MASK_G) + cmask |= (1 << 4); + if (cso->colormask & PIPE_MASK_B) + cmask |= (1 << 8); + if (cso->colormask & PIPE_MASK_A) + cmask |= (1 << 12); + so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); + for (i = 0; i < 8; i++) + so_data(so, cmask); + + bso->pipe = *cso; + so_ref(so, &bso->so); + return (void *)bso; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_blend_stateobj *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static INLINE unsigned +wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return NV50TSC_1_0_WRAPS_REPEAT; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return NV50TSC_1_0_WRAPS_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50TSC_1_0_WRAPS_REPEAT; + } +} +static void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + unsigned *tsc = CALLOC(8, sizeof(unsigned)); + + tsc[0] = (0x00024000 | + (wrap_mode(cso->wrap_s) << 0) | + (wrap_mode(cso->wrap_t) << 3) | + (wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + tsc[1] |= NV50TSC_1_1_MIPF_NONE; + break; + } + + if (cso->max_anisotropy >= 16.0) + tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12.0) + tsc[0] |= (6 << 20); + else + if (cso->max_anisotropy >= 10.0) + tsc[0] |= (5 << 20); + else + if (cso->max_anisotropy >= 8.0) + tsc[0] |= (4 << 20); + else + if (cso->max_anisotropy >= 6.0) + tsc[0] |= (3 << 20); + else + if (cso->max_anisotropy >= 4.0) + tsc[0] |= (2 << 20); + else + if (cso->max_anisotropy >= 2.0) + tsc[0] |= (1 << 20); + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + tsc[0] |= (1 << 8); + tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); + } + + return (void *)tsc; +} + +static void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + nv50->sampler_nr = nr; + for (i = 0; i < nv50->sampler_nr; i++) + nv50->sampler[i] = sampler[i]; + + nv50->dirty |= NV50_NEW_SAMPLER; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **pt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + for (i = 0; i < nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); + for (i = nr; i < nv50->miptree_nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], NULL); + + nv50->miptree_nr = nr; + nv50->dirty |= NV50_NEW_TEXTURE; +} + +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_rasterizer_stateobj *rso = + CALLOC_STRUCT(nv50_rasterizer_stateobj); + + /*XXX: ignored + * - light_twosize + * - point_smooth + * - multisample + * - point_sprite / sprite_coord_mode + */ + + so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : + NV50TCL_SHADE_MODEL_SMOOTH); + + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); + so_data (so, fui(cso->line_width)); + so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); + so_data (so, cso->line_smooth ? 1 : 0); + if (cso->line_stipple_enable) { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); + so_data (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, tesla, NV50TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + } + so_data(so, cso->poly_smooth ? 1 : 0); + + so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); + so_data (so, cso->cull_mode != PIPE_WINDING_NONE); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, NV50TCL_FRONT_FACE_CCW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } else { + so_data(so, NV50TCL_FRONT_FACE_CW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } + + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); + so_data (so, fui(cso->offset_scale)); + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); + so_data (so, fui(cso->offset_units)); + } + + rso->pipe = *cso; + so_ref(so, &rso->so); + return (void *)rso; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rasterizer = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_rasterizer_stateobj *rso = hwcso; + + so_ref(NULL, &rso->so); + FREE(rso); +} + +static void * +nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); + struct nouveau_stateobj *so = so_new(64, 0); + + so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); + so_data (so, cso->depth.writemask ? 1 : 0); + if (cso->depth.enabled) { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); + so_data (so, nvgl_comparison_op(cso->depth.func)); + } else { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 0); + } + + /*XXX: yes, I know they're backwards.. header needs fixing */ + if (cso->stencil[0].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].write_mask); + so_data (so, cso->stencil[0].value_mask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].write_mask); + so_data (so, cso->stencil[1].value_mask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->alpha.enabled) { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); + so_data (so, fui(cso->alpha.ref)); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + } else { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 0); + } + + zsa->pipe = *cso; + so_ref(so, &zsa->so); + return (void *)zsa; +} + +static void +nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_zsa_stateobj *zsa = hwcso; + + so_ref(NULL, &zsa->so); + FREE(zsa); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_VERTEX; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void +nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_FRAGMENT; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void +nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->dirty |= NV50_NEW_VERTPROG_CB; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } +} + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->framebuffer = *fb; + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *s; + nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); + nv50->vtxbuf_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); + nv50->vtxelt_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_blend_state = nv50_blend_state_create; + nv50->pipe.bind_blend_state = nv50_blend_state_bind; + nv50->pipe.delete_blend_state = nv50_blend_state_delete; + + nv50->pipe.create_sampler_state = nv50_sampler_state_create; + nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; + nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; + nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + + nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; + nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; + nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; + + nv50->pipe.create_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_create; + nv50->pipe.bind_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_bind; + nv50->pipe.delete_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_delete; + + nv50->pipe.create_vs_state = nv50_vp_state_create; + nv50->pipe.bind_vs_state = nv50_vp_state_bind; + nv50->pipe.delete_vs_state = nv50_vp_state_delete; + + nv50->pipe.create_fs_state = nv50_fp_state_create; + nv50->pipe.bind_fs_state = nv50_fp_state_bind; + nv50->pipe.delete_fs_state = nv50_fp_state_delete; + + nv50->pipe.set_blend_color = nv50_set_blend_color; + nv50->pipe.set_clip_state = nv50_set_clip_state; + nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; + nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; + nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; + nv50->pipe.set_scissor_state = nv50_set_scissor_state; + nv50->pipe.set_viewport_state = nv50_set_viewport_state; + + nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c new file mode 100644 index 0000000000..198e25f448 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -0,0 +1,309 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nouveau/nouveau_stateobj.h" + +static void +nv50_state_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(128, 18); + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i, w, h, gw = 0; + + for (i = 0; i < fb->num_cbufs; i++) { + if (!gw) { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + gw = 1; + } else { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + } + + so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); + so_data (so, fb->cbufs[i]->width); + so_data (so, fb->cbufs[i]->height); + + so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + switch (fb->cbufs[i]->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, 0xcf); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, 0xe8); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->cbufs[i]->format)); + so_data(so, 0xe6); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1224, 1); + so_data (so, 1); + } + + if (fb->zsbuf) { + if (!gw) { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + gw = 1; + } else { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } + + so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); + so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z24S8_UNORM: + so_data(so, 0x16); + break; + case PIPE_FORMAT_Z16_UNORM: + so_data(so, 0x15); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->zsbuf->format)); + so_data(so, 0x16); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1538, 1); + so_data (so, 1); + so_method(so, tesla, 0x1228, 3); + so_data (so, fb->zsbuf->width); + so_data (so, fb->zsbuf->height); + so_data (so, 0x00010001); + } + + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0x0e04, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0xdf8, 2); + so_data (so, 0); + so_data (so, h); + + so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; + + if (nv50->pctx_id != screen->cur_pctx) { + nv50->state.dirty |= 0xffffffff; + screen->cur_pctx = nv50->pctx_id; + } + + if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) + so_emit(nvws, nv50->state.fb); + if (nv50->state.dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->state.blend); + if (nv50->state.dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->state.zsa); + if (nv50->state.dirty & NV50_NEW_VERTPROG) + so_emit(nvws, nv50->state.vertprog); + if (nv50->state.dirty & NV50_NEW_FRAGPROG) + so_emit(nvws, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->state.rast); + if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) + so_emit(nvws, nv50->state.blend_colour); + if (nv50->state.dirty & NV50_NEW_STIPPLE) + so_emit(nvws, nv50->state.stipple); + if (nv50->state.dirty & NV50_NEW_SCISSOR) + so_emit(nvws, nv50->state.scissor); + if (nv50->state.dirty & NV50_NEW_VIEWPORT) + so_emit(nvws, nv50->state.viewport); + if (nv50->state.dirty & NV50_NEW_SAMPLER) + so_emit(nvws, nv50->state.tsc_upload); + if (nv50->state.dirty & NV50_NEW_TEXTURE) + so_emit(nvws, nv50->state.tic_upload); + if (nv50->state.dirty & NV50_NEW_ARRAYS) { + so_emit(nvws, nv50->state.vtxfmt); + so_emit(nvws, nv50->state.vtxbuf); + } + nv50->state.dirty = 0; + + so_emit_reloc_markers(nvws, nv50->state.fb); + so_emit_reloc_markers(nvws, nv50->state.vertprog); + so_emit_reloc_markers(nvws, nv50->state.fragprog); + so_emit_reloc_markers(nvws, nv50->state.vtxbuf); + so_emit_reloc_markers(nvws, nv50->screen->static_init); +} + +boolean +nv50_state_validate(struct nv50_context *nv50) +{ + const struct pipe_framebuffer_state *fb = &nv50->framebuffer; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + unsigned i; + + for (i = 0; i < fb->num_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + if (nv50->dirty & NV50_NEW_FRAMEBUFFER) + nv50_state_validate_fb(nv50); + + if (nv50->dirty & NV50_NEW_BLEND) + so_ref(nv50->blend->so, &nv50->state.blend); + + if (nv50->dirty & NV50_NEW_ZSA) + so_ref(nv50->zsa->so, &nv50->state.zsa); + + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) + nv50_vertprog_validate(nv50); + + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) + nv50_fragprog_validate(nv50); + + if (nv50->dirty & NV50_NEW_RASTERIZER) + so_ref(nv50->rasterizer->so, &nv50->state.rast); + + if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { + so = so_new(5, 0); + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); + so_data (so, fui(nv50->blend_colour.color[0])); + so_data (so, fui(nv50->blend_colour.color[1])); + so_data (so, fui(nv50->blend_colour.color[2])); + so_data (so, fui(nv50->blend_colour.color[3])); + so_ref(so, &nv50->state.blend_colour); + } + + if (nv50->dirty & NV50_NEW_STIPPLE) { + so = so_new(33, 0); + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv50->stipple.stipple[i]); + so_ref(so, &nv50->state.stipple); + } + + if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { + struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; + struct pipe_scissor_state *s = &nv50->scissor; + + if (nv50->state.scissor && + (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) + goto scissor_uptodate; + nv50->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, tesla, 0x0ff4, 2); + if (nv50->state.scissor_enabled) { + so_data(so, ((s->maxx - s->minx) << 16) | s->minx); + so_data(so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data(so, (8192 << 16)); + so_data(so, (8192 << 16)); + } + so_ref(so, &nv50->state.scissor); + nv50->state.dirty |= NV50_NEW_SCISSOR; + } +scissor_uptodate: + + if (nv50->dirty & NV50_NEW_VIEWPORT) { + unsigned bypass; + + if (!nv50->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv50->state.viewport && + (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && + nv50->state.viewport_bypass == bypass) + goto viewport_uptodate; + nv50->state.viewport_bypass = bypass; + + so = so_new(12, 0); + if (!bypass) { + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(-nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + so_method(so, tesla, 0x192c, 1); + so_data (so, 1); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); + } else { + so_method(so, tesla, 0x192c, 1); + so_data (so, 0); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 1); + } + + so_ref(so, &nv50->state.viewport); + } +viewport_uptodate: + + if (nv50->dirty & NV50_NEW_SAMPLER) { + int i; + + so = so_new(nv50->sampler_nr * 8 + 3, 0); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TSC); + so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) + so_datap (so, nv50->sampler[i], 8); + so_ref(so, &nv50->state.tsc_upload); + } + + if (nv50->dirty & NV50_NEW_TEXTURE) + nv50_tex_validate(nv50); + + if (nv50->dirty & NV50_NEW_ARRAYS) + nv50_vbo_validate(nv50); + + nv50->state.dirty |= nv50->dirty; + nv50->dirty = 0; + nv50_state_emit(nv50); + + return TRUE; +} + diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c new file mode 100644 index 0000000000..3f45a2fe18 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -0,0 +1,92 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static void +nv50_surface_copy(struct pipe_context *pipe, boolean flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nouveau_winsys *nvws = nv50->screen->nvws; + + if (flip) { + desty += height; + while (height--) { + nvws->surface_copy(nvws, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nvws->surface_copy(nvws, dest, destx, desty, src, srcx, srcy, + width, height); + } +} + +static void +nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nouveau_winsys *nvws = nv50->screen->nvws; + + nvws->surface_fill(nvws, dest, destx, desty, width, height, value); +} + +static void * +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + + return ws->buffer_map(ws, ps->buffer, flags); +} + +static void +nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) +{ + struct pipe_winsys *ws = pscreen->winsys; + + ws->buffer_unmap(ws, ps->buffer); +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + nv50->pipe.surface_copy = nv50_surface_copy; + nv50->pipe.surface_fill = nv50_surface_fill; +} + +void +nv50_surface_init_screen_functions(struct pipe_screen *pscreen) +{ + pscreen->surface_map = nv50_surface_map; + pscreen->surface_unmap = nv50_surface_unmap; +} + diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c new file mode 100644 index 0000000000..239407c92b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -0,0 +1,161 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ + switch (mt->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_1_5_5_5); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_4_4_4_4); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_5_6_5); + break; + case PIPE_FORMAT_L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_I8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8); + break; + case PIPE_FORMAT_DXT1_RGB: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT1_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT3_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT3); + break; + case PIPE_FORMAT_DXT5_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT5); + break; + default: + return 1; + } + + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); + + return 0; +} + +void +nv50_tex_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + int unit, level, image; + + so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TIC); + so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); + for (unit = 0; unit < nv50->miptree_nr; unit++) { + struct nv50_miptree *mt = nv50->miptree[unit]; + + for (level = 0; level <= mt->base.last_level; level++) { + for (image = 0; image < mt->image_nr; image++) { + nv50_miptree_sync(&nv50->screen->pipe, mt, + level, image); + } + } + + if (nv50_tex_construct(so, mt)) { + NOUVEAU_ERR("failed tex validate\n"); + so_ref(NULL, &so); + return; + } + } + + so_ref(so, &nv50->state.tic_upload); +} + diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h new file mode 100644 index 0000000000..aca622c73b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -0,0 +1,129 @@ +#ifndef __NV50_TEXTURE_H__ +#define __NV50_TEXTURE_H__ + +/* It'd be really nice to have these in nouveau_class.h generated by + * renouveau like the rest of the object header - but not sure it can + * handle non-object stuff nicely - need to look into it. + */ + +/* Texture image control block */ +#define NV50TIC_0_0_MAPA_MASK 0x38000000 +#define NV50TIC_0_0_MAPA_ZERO 0x00000000 +#define NV50TIC_0_0_MAPA_C0 0x10000000 +#define NV50TIC_0_0_MAPA_C1 0x18000000 +#define NV50TIC_0_0_MAPA_C2 0x20000000 +#define NV50TIC_0_0_MAPA_C3 0x28000000 +#define NV50TIC_0_0_MAPA_ONE 0x38000000 +#define NV50TIC_0_0_MAPR_MASK 0x07000000 +#define NV50TIC_0_0_MAPR_ZERO 0x00000000 +#define NV50TIC_0_0_MAPR_C0 0x02000000 +#define NV50TIC_0_0_MAPR_C1 0x03000000 +#define NV50TIC_0_0_MAPR_C2 0x04000000 +#define NV50TIC_0_0_MAPR_C3 0x05000000 +#define NV50TIC_0_0_MAPR_ONE 0x07000000 +#define NV50TIC_0_0_MAPG_MASK 0x00e00000 +#define NV50TIC_0_0_MAPG_ZERO 0x00000000 +#define NV50TIC_0_0_MAPG_C0 0x00400000 +#define NV50TIC_0_0_MAPG_C1 0x00600000 +#define NV50TIC_0_0_MAPG_C2 0x00800000 +#define NV50TIC_0_0_MAPG_C3 0x00a00000 +#define NV50TIC_0_0_MAPG_ONE 0x00e00000 +#define NV50TIC_0_0_MAPB_MASK 0x001c0000 +#define NV50TIC_0_0_MAPB_ZERO 0x00000000 +#define NV50TIC_0_0_MAPB_C0 0x00080000 +#define NV50TIC_0_0_MAPB_C1 0x000c0000 +#define NV50TIC_0_0_MAPB_C2 0x00100000 +#define NV50TIC_0_0_MAPB_C3 0x00140000 +#define NV50TIC_0_0_MAPB_ONE 0x001c0000 +#define NV50TIC_0_0_TYPEA_MASK 0x00038000 +#define NV50TIC_0_0_TYPEA_UNORM 0x00010000 +#define NV50TIC_0_0_TYPER_MASK 0x00007000 +#define NV50TIC_0_0_TYPER_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEG_MASK 0x00000e00 +#define NV50TIC_0_0_TYPEG_UNORM 0x00000400 +#define NV50TIC_0_0_TYPEB_MASK 0x000001c0 +#define NV50TIC_0_0_TYPEB_UNORM 0x00000080 +#define NV50TIC_0_0_FMT_MASK 0x0000003c +#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 +#define NV50TIC_0_0_FMT_5_6_5 0x00000015 +#define NV50TIC_0_0_FMT_8_8 0x00000018 +#define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_DXT1 0x00000024 +#define NV50TIC_0_0_FMT_DXT3 0x00000025 +#define NV50TIC_0_0_FMT_DXT5 0x00000026 + +#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff +#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 + +#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff +#define NV50TIC_0_4_WIDTH_SHIFT 0 + +#define NV50TIC_0_5_DEPTH_MASK 0xffff0000 +#define NV50TIC_0_5_DEPTH_SHIFT 16 +#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff +#define NV50TIC_0_5_HEIGHT_SHIFT 0 + +#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff +#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0 + +/* Texture sampler control block */ +#define NV50TSC_1_0_WRAPS_MASK 0x00000007 +#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003 +#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007 +#define NV50TSC_1_0_WRAPT_MASK 0x00000038 +#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018 +#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038 +#define NV50TSC_1_0_WRAPR_MASK 0x000001c0 +#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0 +#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 + +#define NV50TSC_1_1_MAGF_MASK 0x00000003 +#define NV50TSC_1_1_MAGF_NEAREST 0x00000001 +#define NV50TSC_1_1_MAGF_LINEAR 0x00000002 +#define NV50TSC_1_1_MINF_MASK 0x00000030 +#define NV50TSC_1_1_MINF_NEAREST 0x00000010 +#define NV50TSC_1_1_MINF_LINEAR 0x00000020 +#define NV50TSC_1_1_MIPF_MASK 0x000000c0 +#define NV50TSC_1_1_MIPF_NONE 0x00000040 +#define NV50TSC_1_1_MIPF_NEAREST 0x00000080 +#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 + +#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff + +#endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c new file mode 100644 index 0000000000..435dc9777d --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -0,0 +1,241 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + +boolean +nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50_state_validate(nv50); + + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x1440, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x1334, 1); + OUT_RING (0); + + BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (nv50_prim(mode)); + BEGIN_RING(tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (start); + OUT_RING (count); + BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_RING(tesla, 0x15e8, 1); + OUT_RING (map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING ((map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, + unsigned start, unsigned count) +{ + map += start; + + if (count & 1) { + BEGIN_RING(tesla, 0x15e8, 1); + OUT_RING (map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING ((map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + map += start; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(tesla, 0x400015e8, nr); + OUT_RINGp (map, nr); + + count -= nr; + map += nr; + } +} + +boolean +nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + nv50_state_validate(nv50); + + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + BEGIN_RING(tesla, 0x142c, 1); + OUT_RING (0); + + BEGIN_RING(tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (nv50_prim(mode)); + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + default: + assert(0); + } + BEGIN_RING(tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +void +nv50_vbo_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + int i, vpi = 0; + + vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); + vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); + so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + + for (i = 0; i < nv50->vtxelt_nr; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt[i]; + struct pipe_vertex_buffer *vb = + &nv50->vtxbuf[ve->vertex_buffer_index]; + + switch (ve->src_format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so_data(vtxfmt, 0x7e080000 | i); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + so_data(vtxfmt, 0x7e100000 | i); + break; + case PIPE_FORMAT_R32G32_FLOAT: + so_data(vtxfmt, 0x7e200000 | i); + break; + case PIPE_FORMAT_R32_FLOAT: + so_data(vtxfmt, 0x7e900000 | i); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + so_data(vtxfmt, 0x24500000 | i); + break; + default: + { + NOUVEAU_ERR("invalid vbo format %s\n", + pf_name(ve->src_format)); + assert(0); + return; + } + } + + so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); + so_data (vtxbuf, 0x20000000 | vb->pitch); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (vtxfmt, &nv50->state.vtxfmt); + so_ref (vtxbuf, &nv50->state.vtxbuf); +} + diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile new file mode 100644 index 0000000000..120bdfd9dd --- /dev/null +++ b/src/gallium/drivers/softpipe/Makefile @@ -0,0 +1,47 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = softpipe + +C_SOURCES = \ + sp_fs_exec.c \ + sp_fs_sse.c \ + sp_fs_llvm.c \ + sp_clear.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_setup.c \ + sp_prim_vbuf.c \ + sp_quad.c \ + sp_quad_alpha_test.c \ + sp_quad_blend.c \ + sp_quad_colormask.c \ + sp_quad_coverage.c \ + sp_quad_depth_test.c \ + sp_quad_earlyz.c \ + sp_quad_fs.c \ + sp_quad_occlusion.c \ + sp_quad_output.c \ + sp_quad_stencil.c \ + sp_quad_stipple.c \ + sp_screen.c \ + sp_setup.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_fs.c \ + sp_state_sampler.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tile_cache.c \ + sp_surface.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript new file mode 100644 index 0000000000..c1f7daa8ab --- /dev/null +++ b/src/gallium/drivers/softpipe/SConscript @@ -0,0 +1,46 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( + target = 'softpipe', + source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', + 'sp_clear.c', + 'sp_context.c', + 'sp_draw_arrays.c', + 'sp_flush.c', + 'sp_prim_setup.c', + 'sp_prim_vbuf.c', + 'sp_setup.c', + 'sp_quad_alpha_test.c', + 'sp_quad_blend.c', + 'sp_quad.c', + 'sp_quad_colormask.c', + 'sp_quad_coverage.c', + 'sp_quad_depth_test.c', + 'sp_quad_earlyz.c', + 'sp_quad_fs.c', + 'sp_quad_occlusion.c', + 'sp_quad_output.c', + 'sp_quad_stencil.c', + 'sp_quad_stipple.c', + 'sp_query.c', + 'sp_screen.c', + 'sp_state_blend.c', + 'sp_state_clip.c', + 'sp_state_derived.c', + 'sp_state_fs.c', + 'sp_state_rasterizer.c', + 'sp_state_sampler.c', + 'sp_state_surface.c', + 'sp_state_vertex.c', + 'sp_surface.c', + 'sp_tex_sample.c', + 'sp_texture.c', + 'sp_tile_cache.c', + ]) + +Export('softpipe')
\ No newline at end of file diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c new file mode 100644 index 0000000000..dfa46c9fb7 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + * Note: when clearing a color buffer, the clearValue is always + * encoded as PIPE_FORMAT_A8R8G8B8_UNORM. + */ +void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + if (softpipe->no_rast) + return; + +#if 0 + softpipe_update_derived(softpipe); /* not needed?? */ +#endif + + if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { + sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); + softpipe->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_CLEAR; +#if TILE_CLEAR_OPTIMIZATION + return; +#endif + } + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { + unsigned cv; + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + else { + cv = clearValue; + } + sp_tile_cache_clear(softpipe->cbuf_cache[i], cv); + softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; + } + } + +#if !TILE_CLEAR_OPTIMIZATION + /* non-cached surface */ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +#endif +} diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h new file mode 100644 index 0000000000..a8ed1c4ecc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + +#ifndef SP_CLEAR_H +#define SP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +#endif /* SP_CLEAR_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c new file mode 100644 index 0000000000..800f944838 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -0,0 +1,288 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + if (softpipe->draw) + draw_destroy( softpipe->draw ); + + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); + softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); + softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); + softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); + softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); + softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); + softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); + softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); + softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); + softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); + softpipe->quad[i].output->destroy( softpipe->quad[i].output ); + } + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + void *unused ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + + util_init_math(); + +#ifdef PIPE_ARCH_X86 + softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; + softpipe->pipe.destroy = softpipe_destroy; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; + + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( softpipe ); + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); + softpipe->zsbuf_cache = sp_create_tile_cache( screen ); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache( screen ); + + + /* setup quad rendering stages */ + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); + softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); + softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad[i].output = sp_quad_output_stage(softpipe); + } + + /* vertex shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; + softpipe->tgsi.vert_samplers[i].unit = i; + softpipe->tgsi.vert_samplers[i].sp = softpipe; + softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; + softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; + } + + /* fragment shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; + softpipe->tgsi.frag_samplers[i].unit = i; + softpipe->tgsi.frag_samplers[i].sp = softpipe; + softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; + softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; + } + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + if (!softpipe->draw) + goto fail; + + draw_texture_samplers(softpipe->draw, + PIPE_MAX_SAMPLERS, + (struct tgsi_sampler **) + softpipe->tgsi.vert_samplers_list); + + softpipe->setup = sp_draw_render_stage(softpipe); + if (!softpipe->setup) + goto fail; + + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) + softpipe->no_rast = TRUE; + + if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { + /* Deprecated path -- vbuf is the intended interface to the draw module: + */ + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + else { + sp_init_vbuf(softpipe); + } + + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; + + fail: + softpipe_destroy(&softpipe->pipe); + return NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h new file mode 100644 index 0000000000..e2451c6ecb --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -0,0 +1,174 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_CONTEXT_H +#define SP_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "draw/draw_vertex.h" + +#include "sp_quad.h" +#include "sp_tex_sample.h" + + +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 1 + +struct softpipe_winsys; +struct softpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct softpipe_tile_cache; +struct sp_fragment_shader; +struct sp_vertex_shader; + + +struct softpipe_context { + struct pipe_context pipe; /**< base class */ + + /* The most recent drawing state as set by the driver: + */ + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct sp_fragment_shader *fs; + const struct sp_vertex_shader *vs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + boolean no_rast; + + /* Counter for occlusion queries. Note this supports overlapping + * queries. + */ + uint64_t occlusion_count; + + /* + * Mapped vertex buffers + */ + ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + /** Vertex format */ + struct vertex_info vertex_info; + struct vertex_info vertex_info_vbuf; + + int psize_slot; + + unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + +#if 0 + /* Stipple derived state: + */ + ubyte stipple_masks[16][16]; +#endif + + /** Derived from scissor and surface bounds: */ + struct pipe_scissor_state cliprect; + + unsigned line_stipple_counter; + + /** Software quad rendering pipeline */ + struct { + struct quad_stage *polygon_stipple; + struct quad_stage *earlyz; + struct quad_stage *shade; + struct quad_stage *alpha_test; + struct quad_stage *stencil_test; + struct quad_stage *depth_test; + struct quad_stage *occlusion; + struct quad_stage *coverage; + struct quad_stage *blend; + struct quad_stage *colormask; + struct quad_stage *output; + + struct quad_stage *first; /**< points to one of the above stages */ + } quad[SP_NUM_QUAD_THREADS]; + + /** TGSI exec things */ + struct { + struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; + } tgsi; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *setup; + struct draw_stage *vbuf; + struct softpipe_vbuf_render *vbuf_render; + + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; + struct softpipe_tile_cache *zsbuf_cache; + + struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + + int use_sse : 1; + int dump_fs : 1; +}; + + +static INLINE struct softpipe_context * +softpipe_context( struct pipe_context *pipe ) +{ + return (struct softpipe_context *)pipe; +} + +#endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c new file mode 100644 index 0000000000..424bd56846 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" + +#include "draw/draw_context.h" + + + +static void +softpipe_map_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); +} + +static void +softpipe_unmap_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + + /* really need to flush all prims since the vert/frag shaders const buffers + * are going away now. + */ + draw_flush(sp->draw); + + draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +boolean +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ + +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + sp->reduced_api_prim = reduced_prim[mode]; + + if (sp->dirty) + softpipe_update_derived( sp ); + + softpipe_map_surfaces(sp); + softpipe_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf + = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } + + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + return TRUE; +} + +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct softpipe_context *sp = softpipe_context(pipe); + draw_set_edgeflags(sp->draw, edgeflags); +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c new file mode 100644 index 0000000000..401764bb43 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "sp_flush.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" +#include "sp_winsys.h" + + +void +softpipe_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + draw_flush(softpipe->draw); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + for (i = 0; i < softpipe->num_textures; i++) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + } + } + + if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); + } + + /* Enable to dump BMPs of the color/depth buffers each frame */ +#if 0 + if(flags & PIPE_FLUSH_FRAME) { + static unsigned frame_no = 1; + static char filename[256]; + util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + ++frame_no; + } +#endif + + if (fence) + *fence = NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h new file mode 100644 index 0000000000..68d9b5fa83 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_FLUSH_H +#define SP_FLUSH_H + +struct pipe_context; +struct pipe_fence_handle; + +void softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h new file mode 100644 index 0000000000..4792ace3a3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_FS_H +#define SP_FS_H + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct tgsi_interp_coef; +struct tgsi_exec_vector; + +void sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c new file mode 100644 index 0000000000..453b0373f0 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" + +struct sp_exec_fragment_shader +{ + struct sp_fragment_shader base; +}; + + +/** cast wrapper */ +static INLINE struct sp_exec_fragment_shader * +sp_exec_fragment_shader(const struct sp_fragment_shader *base) +{ + return (struct sp_exec_fragment_shader *) base; +} + + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +void +sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + +static void +exec_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers ) +{ + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + } +} + + + + +/* TODO: hide the machine struct in here somewhere, remove from this + * interface: + */ +static unsigned +exec_run( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) +{ + + /* Compute X, Y, Z, W vals for this quad */ + sp_setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + &machine->QuadPos); + + return tgsi_exec_machine_run( machine ); +} + + + +static void +exec_delete( struct sp_fragment_shader *base ) +{ + FREE((void *) base->shader.tokens); + FREE(base); +} + + + + + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_exec_fragment_shader *shader; + + /* Decide whether we'll be codegenerating this shader and if so do + * that now. + */ + + shader = CALLOC_STRUCT(sp_exec_fragment_shader); + if (!shader) + return NULL; + + /* we need to keep a local copy of the tokens */ + shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); + shader->base.prepare = exec_prepare; + shader->base.run = exec_run; + shader->base.delete = exec_delete; + + return &shader->base; +} + diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c new file mode 100644 index 0000000000..34adac5226 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -0,0 +1,200 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Zack Rusin + */ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_sse2.h" + +#if 0 + +struct sp_llvm_fragment_shader { + struct sp_fragment_shader base; + struct gallivm_prog *llvm_prog; +}; + +static void +shade_quad_llvm(struct quad_stage *qs, + struct quad_header *quad) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + float dests[4][16][4] ALIGN16_ATTRIB; + float inputs[4][16][4] ALIGN16_ATTRIB; + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + struct gallivm_prog *llvm = qss->llvm_prog; + + inputs[0][0][0] = fx; + inputs[1][0][0] = fx + 1.0f; + inputs[2][0][0] = fx; + inputs[3][0][0] = fx + 1.0f; + + inputs[0][0][1] = fy; + inputs[1][0][1] = fy; + inputs[2][0][1] = fy + 1.0f; + inputs[3][0][1] = fy + 1.0f; + + + gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); + +#if DLLVM + debug_printf("MASK = %d\n", quad->mask); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 2; ++j) { + debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, + inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); + } + } +#endif + + quad->mask &= + gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + qss->samplers); +#if DLLVM + debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", + dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], + dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); +#endif + + /* store result color */ + if (qss->colorOutSlot >= 0) { + unsigned i; + /* XXX need to handle multiple color outputs someday */ + allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + for (i = 0; i < QUAD_SIZE; ++i) { + quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; + } + } +#if DLLVM + for (int i = 0; i < QUAD_SIZE; ++i) { + debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, + quad->outputs.color[0][0][i], + quad->outputs.color[0][1][i], + quad->outputs.color[0][2][i], + quad->outputs.color[0][3][i]); + } +#endif + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = dests[i][0][2]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = inputs[i][0][2]; + } + } +#if DLLVM + debug_printf("D [%f, %f, %f, %f] mask = %d\n", + quad->outputs.depth[0], + quad->outputs.depth[1], + quad->outputs.depth[2], + quad->outputs.depth[3], quad->mask); +#endif + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + + +unsigned +run_llvm_fs( const struct sp_fragment_shader *base, + struct foo *machine ) +{ +} + + +void +delete_llvm_fs( struct sp_fragment_shader *base ) +{ + FREE(base); +} + + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_llvm_fragment_shader *shader = NULL; + + /* LLVM fragment shaders currently disabled: + */ + state = CALLOC_STRUCT(sp_llvm_shader_state); + if (!state) + return NULL; + + state->llvm_prog = 0; + + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); + + if (shader) { + shader->base.run = run_llvm_fs; + shader->base.delete = delete_llvm_fs; + } + + return shader; +} + + +#else + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c new file mode 100644 index 0000000000..9a273c8764 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -0,0 +1,169 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" + + +#if defined(PIPE_ARCH_X86) + +#include "rtasm/rtasm_x86sse.h" + +/* Surely this should be defined somewhere in a tgsi header: + */ +typedef void (PIPE_CDECL *codegen_function)( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + const float (*constant)[4], + struct tgsi_exec_vector *temporary, + const struct tgsi_interp_coef *coef, + float (*immediates)[4] + //, const struct tgsi_exec_vector *quadPos + ); + + +struct sp_sse_fragment_shader { + struct sp_fragment_shader base; + struct x86_function sse2_program; + codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; +}; + + + +static void +fs_sse_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers ) +{ +} + + +/* TODO: codegenerate the whole run function, skip this wrapper. + * TODO: break dependency on tgsi_exec_machine struct + * TODO: push Position calculation into the generated shader + * TODO: process >1 quad at a time + */ +static unsigned +fs_sse_run( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) +{ + struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + + /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ + sp_setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + machine->Temps); + + /* init kill mask */ + tgsi_set_kill_mask(machine, 0x0); + tgsi_set_exec_mask(machine, 1, 1, 1, 1); + + shader->func( machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + machine->InterpCoefs, + shader->immediates + // , &machine->QuadPos + ); + + return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); +} + + +static void +fs_sse_delete( struct sp_fragment_shader *base ) +{ + struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + + x86_release_func( &shader->sse2_program ); + FREE(shader); +} + + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_sse_fragment_shader *shader; + + if (!softpipe->use_sse) + return NULL; + + shader = CALLOC_STRUCT(sp_sse_fragment_shader); + if (!shader) + return NULL; + + x86_init_func( &shader->sse2_program ); + + if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, + shader->immediates, FALSE )) { + FREE(shader); + return NULL; + } + + shader->func = (codegen_function) x86_get_func( &shader->sse2_program ); + if (!shader->func) { + x86_release_func( &shader->sse2_program ); + FREE(shader); + return NULL; + } + + shader->base.shader = *templ; + shader->base.prepare = fs_sse_prepare; + shader->base.run = fs_sse_run; + shader->base.delete = fs_sse_delete; + + return &shader->base; +} + + +#else + +/* Maybe put this varient in the header file. + */ +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h new file mode 100644 index 0000000000..4a42cb3c19 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_HEADERS_H +#define SP_HEADERS_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" + +#define PRIM_POINT 1 +#define PRIM_LINE 2 +#define PRIM_TRI 3 + + +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +/** + * Encodes everything we need to know about a 2x2 pixel block. Uses + * "Channel-Serial" or "SoA" layout. + */ +struct quad_header_input +{ + int x0; + int y0; + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ + unsigned facing:1; /**< Front (0) or back (1) facing? */ + unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ +}; + +struct quad_header_inout +{ + unsigned mask:4; +}; + +struct quad_header_output +{ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float depth[QUAD_SIZE]; +}; + +struct quad_header { + struct quad_header_input input; + struct quad_header_inout inout; + struct quad_header_output output; + + const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; + + unsigned nr_attrs; +}; + +#endif /* SP_HEADERS_H */ + diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c new file mode 100644 index 0000000000..038ff04d4f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline. One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + + +#include "sp_context.h" +#include "sp_setup.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_pipe.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct setup_context *setup; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +typedef const float (*cptrf4)[4]; + +static void +do_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_tri( setup->setup, + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data, + (cptrf4)prim->v[2]->data ); +} + +static void +do_line(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_line( setup->setup, + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data ); +} + +static void +do_point(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_point( setup->setup, + (cptrf4)prim->v[0]->data ); +} + + + + +static void setup_begin( struct draw_stage *stage ) +{ + struct setup_stage *setup = setup_stage(stage); + + setup_prepare( setup->setup ); + + stage->point = do_point; + stage->line = do_line; + stage->tri = do_tri; +} + + +static void setup_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->point( stage, header ); +} + +static void setup_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->line( stage, header ); +} + + +static void setup_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->tri( stage, header ); +} + + + +static void setup_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->point = setup_first_point; + stage->line = setup_first_line; + stage->tri = setup_first_tri; +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + + +static void render_destroy( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + setup_destroy_context(ssetup->setup); + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) +{ + struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); + + sstage->setup = setup_create_context(softpipe); + sstage->stage.draw = softpipe->draw; + sstage->stage.point = setup_first_point; + sstage->stage.line = setup_first_line; + sstage->stage.tri = setup_first_tri; + sstage->stage.flush = setup_flush; + sstage->stage.reset_stipple_counter = reset_stipple_counter; + sstage->stage.destroy = render_destroy; + + return (struct draw_stage *)sstage; +} + +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ + stage->flush( stage, 0 ); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h new file mode 100644 index 0000000000..49bdd98ed8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SP_PRIM_SETUP_H +#define SP_PRIM_SETUP_H + + +/** + * vbuf is a special stage to gather the stream of triangles, lines, points + * together and reconstruct vertex buffers for hardware upload. + * + * First attempt, work in progress. + * + * TODO: + * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. + * - tell vbuf stage how to build hw vertices directly + * - pass vbuf stage a buffer pointer for direct emit to agp/vram. + * + * + * + * Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + + +struct draw_stage; +struct softpipe_context; + + +typedef void (*vbuf_draw_func)( struct pipe_context *pipe, + unsigned prim, + const ushort *elements, + unsigned nr_elements, + const void *vertex_buffer, + unsigned nr_vertices ); + + +extern struct draw_stage * +sp_draw_render_stage( struct softpipe_context *softpipe ); + +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + + +extern struct draw_stage * +sp_draw_vbuf_stage( struct draw_context *draw_context, + struct pipe_context *pipe, + vbuf_draw_func draw ); + + +#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c new file mode 100644 index 0000000000..9cd5784e5b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -0,0 +1,410 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the softpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + * Brian Paul + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +#define SP_MAX_VBUF_INDEXES 1024 +#define SP_MAX_VBUF_SIZE 4096 + +typedef const float (*cptrf4)[4]; + +/** + * Subclass of vbuf_render. + */ +struct softpipe_vbuf_render +{ + struct vbuf_render base; + struct softpipe_context *softpipe; + uint prim; + uint vertex_size; + void *vertex_buffer; +}; + + +/** cast wrapper */ +static struct softpipe_vbuf_render * +softpipe_vbuf_render(struct vbuf_render *vbr) +{ + return (struct softpipe_vbuf_render *) vbr; +} + + +static const struct vertex_info * +sp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + return softpipe_get_vbuf_vertex_info(cvbr->softpipe); +} + + +static void * +sp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + assert(!cvbr->vertex_buffer); + cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(vertices); + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + +static boolean +sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); + + setup_prepare( setup_ctx ); + + + + cvbr->prim = prim; + return TRUE; + +} + + +static INLINE cptrf4 get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (cptrf4)((char *)vertex_buffer + index * stride); +} + + +/** + * draw elements / indexed primitives + */ +static void +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + const void *vertex_buffer = cvbr->vertex_buffer; + unsigned i; + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + + switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + default: + assert(0); + } + + /* XXX: why are we calling this??? If we had to call something, it + * would be a function in sp_setup.c: + */ + sp_draw_flush( setup ); +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(cvbr->vertex_buffer, start, stride); + unsigned i; + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + + switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + default: + assert(0); + } +} + + + +static void +sp_vbuf_destroy(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->softpipe->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +sp_init_vbuf(struct softpipe_context *sp) +{ + assert(sp->draw); + + sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + + sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; + sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; + + sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; + sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; + sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; + sp->vbuf_render->base.draw = sp_vbuf_draw; + sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; + sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; + sp->vbuf_render->base.destroy = sp_vbuf_destroy; + + sp->vbuf_render->softpipe = sp; + + sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + + draw_set_rasterize_stage(sp->draw, sp->vbuf); + + draw_set_render(sp->draw, &sp->vbuf_render->base); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h new file mode 100644 index 0000000000..1de9cc2a89 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_VBUF_H +#define SP_VBUF_H + + +struct softpipe_context; + +extern void +sp_init_vbuf(struct softpipe_context *softpipe); + + +#endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 0000000000..892ef87ee9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( + struct softpipe_context *sp, + struct quad_stage *quad, + uint i ) +{ + quad->next = sp->quad[i].first; + sp->quad[i].first = quad; +} + +static void +sp_build_depth_stencil( + struct softpipe_context *sp, + uint i ) +{ + if (sp->depth_stencil->stencil[0].enabled || + sp->depth_stencil->stencil[1].enabled) { + sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); + } + else if (sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf) { + sp_push_quad_first( sp, sp->quad[i].depth_test, i ); + } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ + uint i; + + boolean early_depth_test = + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; + + /* build up the pipeline in reverse order... */ + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first = sp->quad[i].output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad[i].colormask, i ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad[i].blend, i ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad[i].coverage, i ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp, i ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad[i].shade, i ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp, i ); + sp_push_quad_first( sp, sp->quad[i].earlyz, i ); + } + +#if !USE_DRAW_STAGE_PSTIPPLE + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + } +#endif + } +} + diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h new file mode 100644 index 0000000000..08513cb95f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_QUAD_H +#define SP_QUAD_H + + +struct softpipe_context; +struct quad_header; + + +struct quad_stage { + struct softpipe_context *softpipe; + + struct quad_stage *next; + + void (*begin)(struct quad_stage *qs); + + /** the stage action */ + void (*run)(struct quad_stage *qs, struct quad_header *quad); + + void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c new file mode 100644 index 0000000000..5bebd141e9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -0,0 +1,108 @@ + +/** + * quad alpha test + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static void +alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + const float ref = softpipe->depth_stencil->alpha.ref; + unsigned passMask = 0x0, j; + const uint cbuf = 0; /* only output[0].alpha is tested */ + const float *aaaa = quad->output.color[cbuf][3]; + + switch (softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_NEVER: + break; + case PIPE_FUNC_LESS: + /* + * If mask were an array [4] we could do this SIMD-style: + * passMask = (quad->outputs.color[0][3] <= vec4(ref)); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] < ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] == ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] <= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] > ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] != ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] >= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + quad->inout.mask &= passMask; + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void alpha_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void alpha_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = alpha_test_begin; + stage->run = alpha_test_quad; + stage->destroy = alpha_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c new file mode 100644 index 0000000000..6f64c6e584 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -0,0 +1,759 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * quad blending + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" + + +#define VEC4_COPY(DST, SRC) \ +do { \ + DST[0] = SRC[0]; \ + DST[1] = SRC[1]; \ + DST[2] = SRC[2]; \ + DST[3] = SRC[3]; \ +} while(0) + +#define VEC4_SCALAR(DST, SRC) \ +do { \ + DST[0] = SRC; \ + DST[1] = SRC; \ + DST[2] = SRC; \ + DST[3] = SRC; \ +} while(0) + +#define VEC4_ADD(R, A, B) \ +do { \ + R[0] = A[0] + B[0]; \ + R[1] = A[1] + B[1]; \ + R[2] = A[2] + B[2]; \ + R[3] = A[3] + B[3]; \ +} while (0) + +#define VEC4_SUB(R, A, B) \ +do { \ + R[0] = A[0] - B[0]; \ + R[1] = A[1] - B[1]; \ + R[2] = A[2] - B[2]; \ + R[3] = A[3] - B[3]; \ +} while (0) + +#define VEC4_MUL(R, A, B) \ +do { \ + R[0] = A[0] * B[0]; \ + R[1] = A[1] * B[1]; \ + R[2] = A[2] * B[2]; \ + R[3] = A[3] * B[3]; \ +} while (0) + +#define VEC4_MIN(R, A, B) \ +do { \ + R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ +} while (0) + +#define VEC4_MAX(R, A, B) \ +do { \ + R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ +} while (0) + + + +static void +logicop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } + + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } + + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + + + +static void +blend_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; + } + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); + } + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } + + } /* cbuf loop */ + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void blend_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void blend_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = blend_begin; + stage->run = blend_quad; + stage->destroy = blend_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c new file mode 100644 index 0000000000..92e9af09c1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -0,0 +1,74 @@ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + + +/** + * Loop over colorbuffers, passing quad to next stage each time. + */ +static void +cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; + unsigned i; + + assert(sizeof(quad->outputs.color) == sizeof(tmp)); + assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + + /* make copy of original colors since they can get modified + * by blending and masking. + * XXX we won't have to do this if the fragment program actually emits + * N separate colors and we're drawing to N color buffers (MRT). + * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is + * in effect, we need to save/restore colors like this. + */ + memcpy(tmp, quad->outputs.color, sizeof(tmp)); + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + /* set current cbuffer */ +#if 0 /* obsolete & going away */ + softpipe->current_cbuf = i; +#endif + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); + + /* restore quad's colors for next buffer */ + memcpy(quad->outputs.color, tmp, sizeof(tmp)); + } +} + + +static void cbuf_loop_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void cbuf_loop_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +/** + * Create the colorbuffer loop stage. + * This is used to implement multiple render targets and GL_FRONT_AND_BACK + * rendering. + */ +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = cbuf_loop_begin; + stage->run = cbuf_loop_quad; + stage->destroy = cbuf_loop_destroy; + + return stage; +} + diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c new file mode 100644 index 0000000000..f32bdfab78 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief quad colormask stage + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + + +/** + * XXX colormask could be rolled into blending... + */ +static void +colormask_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void colormask_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void colormask_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = colormask_begin; + stage->run = colormask_quad; + stage->destroy = colormask_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c new file mode 100644 index 0000000000..ee29aa7dfe --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Apply AA coverage to quad alpha valus + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * Multiply quad's alpha values by the fragment coverage. + */ +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } + } + + qs->next->run(qs->next, quad); +} + + +static void coverage_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void coverage_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = coverage_begin; + stage->run = coverage_quad; + stage->destroy = coverage_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c new file mode 100644 index 0000000000..523bd3e080 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -0,0 +1,290 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad depth testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Do depth testing for a quad. + * Not static since it's used by the stencil code. + */ + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +void +sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + const enum pipe_format format = ps->format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + unsigned zmask = 0; + unsigned j; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + + assert(ps); /* shouldn't get here if there's no zbuffer */ + + /* + * Convert quad's float depth values to int depth values (qzzzz). + * If the Z buffer stores integer values, we _have_ to do the depth + * compares with integers (not floats). Otherwise, the float->int->float + * conversion of Z values (which isn't an identity function) will cause + * Z-fighting errors. + * + * Also, get the zbuffer values (bzzzz) from the cached tile. + */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + { + float scale = 65535.0; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth16[y][x]; + } + } + break; + case PIPE_FORMAT_Z32_UNORM: + { + double scale = (double) (uint) ~0UL; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x]; + } + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ + case PIPE_FORMAT_S8Z24_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + } + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + /* fall-through */ + case PIPE_FORMAT_Z24S8_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] >> 8; + } + } + break; + default: + assert(0); + } + + switch (softpipe->depth_stencil->depth.func) { + case PIPE_FUNC_NEVER: + /* zmask = 0 */ + break; + case PIPE_FUNC_LESS: + /* Note this is pretty much a single sse or cell instruction. + * Like this: quad->mask &= (quad->outputs.depth < zzzz); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] < bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] == bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] <= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] > bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] != bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] >= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_ALWAYS: + zmask = MASK_ALL; + break; + default: + assert(0); + } + + quad->inout.mask &= zmask; + + if (softpipe->depth_stencil->depth.writemask) { + + /* This is also efficient with sse / spe instructions: + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + bzzzz[j] = qzzzz[j]; + } + } + + /* put updated Z values back into cached tile */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) bzzzz[j]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ + /* (yes, this falls through to a different case than above) */ + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j] << 8; + } + break; + default: + assert(0); + } + } +} + + +static void +depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + sp_depth_test_quad(qs, quad); + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void depth_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void depth_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = depth_test_begin; + stage->run = depth_test_quad; + stage->destroy = depth_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c new file mode 100644 index 0000000000..6e2dde304e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad early-z testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * All this stage does is compute the quad's Z values (which is normally + * done by the shading stage). + * The next stage will do the actual depth test. + */ +static void +earlyz_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; + + qs->next->run( qs->next, quad ); +} + +static void +earlyz_begin( + struct quad_stage *qs ) +{ + qs->next->begin( qs->next ); +} + +static void +earlyz_destroy( + struct quad_stage *qs ) +{ + FREE( qs ); +} + +struct quad_stage * +sp_quad_earlyz_stage( + struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); + + stage->softpipe = softpipe; + stage->begin = earlyz_begin; + stage->run = earlyz_quad; + stage->destroy = earlyz_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c new file mode 100644 index 0000000000..5dacbbe55f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -0,0 +1,189 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; /**< base class */ + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + boolean z_written; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* run shader */ + quad->inout.mask &= softpipe->fs->run( softpipe->fs, + &qss->machine, + quad ); + + /* store outputs */ + z_written = FALSE; + { + const ubyte *sem_name = softpipe->fs->info.output_semantic_name; + const ubyte *sem_index = softpipe->fs->info.output_semantic_index; + const uint n = qss->stage.softpipe->fs->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + z_written = TRUE; + } + break; + } + } + } + + if (!z_written) { + /* compute Z values now, as in the quad earlyz stage */ + /* XXX we should really only do this if the earlyz stage is not used */ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; + } + + /* shader may cull fragments */ + if( quad->inout.mask ) { + qs->next->run( qs->next, quad ); + } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + + softpipe->fs->prepare( softpipe->fs, + &qss->machine, + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; + qss->stage.run = shade_quad; + qss->stage.destroy = shade_destroy; + + tgsi_exec_machine_init( &qss->machine ); + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c new file mode 100644 index 0000000000..169bd82876 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Quad occlusion counter stage + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + +static unsigned count_bits( unsigned val ) +{ + unsigned i; + + for (i = 0; val ; val >>= 1) + i += (val & 1); + + return i; +} + +static void +occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + softpipe->occlusion_count += count_bits(quad->inout.mask); + + qs->next->run(qs->next, quad); +} + + +static void occlusion_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void occlusion_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = occlusion_begin; + stage->run = occlusion_count_quad; + stage->destroy = occlusion_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c new file mode 100644 index 0000000000..b7aac7f84a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Last step of quad processing: write quad colors to the framebuffer, + * taking mask into account. + */ +static void +output_quad(struct quad_stage *qs, struct quad_header *quad) +{ + /* in-tile pos: */ + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; + + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + if (0) { + debug_printf("sp write pixel %d,%d: %g, %g, %g\n", + quad->input.x0 + x, + quad->input.y0 + y, + quadColor[0][j], + quadColor[1][j], + quadColor[2][j]); + } + } + } + } +} + + +static void output_begin(struct quad_stage *qs) +{ + assert(qs->next == NULL); +} + + +static void output_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = output_begin; + stage->run = output_quad; + stage->destroy = output_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c new file mode 100644 index 0000000000..abb5487748 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -0,0 +1,352 @@ + +/** + * \brief Quad stencil testing + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(ubyte stencilVals[QUAD_SIZE], + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] < STENCIL_MAX) { + newstencil[j] = stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] > 0) { + newstencil[j] = stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~stencilVals[j]; + } + } + break; + default: + assert(0); + } + + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = newstencil[j]; + } + } +} + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static void +stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + uint j; + uint face = quad->input.facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].write_mask; + valMask = softpipe->depth_stencil->stencil[face].value_mask; + + assert(ps); /* shouldn't get here if there's no stencil buffer */ + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); + } + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } + } + + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; + + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); + } + + } + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); + } + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void stencil_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stencil_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stencil_begin; + stage->run = stencil_test_quad; + stage->destroy = stencil_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c new file mode 100644 index 0000000000..ccf37f6be5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -0,0 +1,94 @@ + +/** + * quad polygon stipple stage + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** + * Apply polygon stipple to quads produced by triangle rasterization + */ +static void +stipple_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const uint bit31 = 1 << 31; + static const uint bit30 = 1 << 30; + + if (quad->input.prim == PRIM_TRI) { + struct softpipe_context *softpipe = qs->softpipe; + /* need to invert Y to index into OpenGL's stipple pattern */ + int y0, y1; + uint stipple0, stipple1; + if (softpipe->rasterizer->origin_lower_left) { + y0 = softpipe->framebuffer.height - 1 - quad->input.y0; + y1 = y0 - 1; + } + else { + y0 = quad->input.y0; + y1 = y0 + 1; + } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + +#if 1 + { + const int col0 = quad->input.x0 % 32; + if ((stipple0 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_LEFT; + + if ((stipple0 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + } +#else + /* We'd like to use this code, but we'd need to redefine + * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), + * and similarly for the BOTTOM bits. But that may have undesirable + * side effects elsewhere. + */ + const int col0 = 30 - (quad->input.x0 % 32); + quad->inout.mask &= (((stipple0 >> col0) & 0x3) | + (((stipple1 >> col0) & 0x3) << 2)); +#endif + if (!quad->inout.mask) + return; + } + + qs->next->run(qs->next, quad); +} + + +static void stipple_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stipple_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stipple_begin; + stage->run = stipple_quad; + stage->destroy = stipple_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c new file mode 100644 index 0000000000..b0d8e01426 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_query.h" + +struct softpipe_query { + uint64_t start; + uint64_t end; +}; + + +static struct softpipe_query *softpipe_query( struct pipe_query *p ) +{ + return (struct softpipe_query *)p; +} + +static struct pipe_query * +softpipe_create_query(struct pipe_context *pipe, + unsigned type) +{ + assert(type == PIPE_QUERY_OCCLUSION_COUNTER); + return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); +} + + +static void +softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + FREE(q); +} + + +static void +softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->start = softpipe->occlusion_count; +} + + +static void +softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->end = softpipe->occlusion_count; +} + + +static boolean +softpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result ) +{ + struct softpipe_query *sq = softpipe_query(q); + *result = sq->end - sq->start; + return TRUE; +} + + +void softpipe_init_query_funcs(struct softpipe_context *softpipe ) +{ + softpipe->pipe.create_query = softpipe_create_query; + softpipe->pipe.destroy_query = softpipe_destroy_query; + softpipe->pipe.begin_query = softpipe_begin_query; + softpipe->pipe.end_query = softpipe_end_query; + softpipe->pipe.get_query_result = softpipe_get_query_result; +} + + diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h new file mode 100644 index 0000000000..05060a4575 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#ifndef SP_QUERY_H +#define SP_QUERY_H + +struct softpipe_context; +extern void softpipe_init_query_funcs(struct softpipe_context * ); + + +#endif /* SP_QUERY_H */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c new file mode 100644 index 0000000000..11b08b3a82 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_screen.h" + + +static const char * +softpipe_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +softpipe_get_name(struct pipe_screen *screen) +{ + return "softpipe"; +} + + +static int +softpipe_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return PIPE_MAX_SAMPLERS; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return PIPE_MAX_SAMPLERS; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return PIPE_MAX_COLOR_BUFS; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +softpipe_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + default: + return 0; + } +} + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + return TRUE; + } +} + + +static void +softpipe_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no softpipe_screen). + */ +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *winsys) +{ + struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); + + if (!screen) + return NULL; + + screen->base.winsys = winsys; + + screen->base.destroy = softpipe_destroy_screen; + + screen->base.get_name = softpipe_get_name; + screen->base.get_vendor = softpipe_get_vendor; + screen->base.get_param = softpipe_get_param; + screen->base.get_paramf = softpipe_get_paramf; + screen->base.is_format_supported = softpipe_is_format_supported; + + softpipe_init_screen_texture_funcs(&screen->base); + + return &screen->base; +} diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h new file mode 100644 index 0000000000..3d4bfd3e84 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SCREEN_H +#define SP_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + + +struct softpipe_screen { + struct pipe_screen base; + + /* Increments whenever textures are modified. Contexts can track + * this. + */ + unsigned timestamp; +}; + + + + +static INLINE struct softpipe_screen * +softpipe_screen( struct pipe_screen *pipe ) +{ + return (struct softpipe_screen *)pipe; +} + + +#endif /* SP_SCREEN_H */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 0000000000..13d8017393 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1569 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +/* Set to 1 if you want other threads to be instantly + * notified of pending jobs. + */ +#define INSTANT_NOTEMPTY_NOTIFY 0 + +struct thread_info +{ + struct setup_context *setup; + uint id; + pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ + struct quad_header_input input; + struct quad_header_inout inout; + quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ + struct quad_job jobs[NUM_QUAD_JOBS]; + uint first; + uint last; + pipe_mutex que_mutex; + pipe_condvar que_notfull_condvar; + pipe_condvar que_notempty_condvar; + uint jobs_added; + uint jobs_done; + pipe_condvar que_done_condvar; +}; + +static void +add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) +{ +#if INSTANT_NOTEMPTY_NOTIFY + boolean empty; +#endif + + /* Wait for empty slot, see if the que is empty. + */ + pipe_mutex_lock( que->que_mutex ); + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { +#if !INSTANT_NOTEMPTY_NOTIFY + pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif + pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); + } +#if INSTANT_NOTEMPTY_NOTIFY + empty = que->last == que->first; +#endif + que->jobs_added++; + pipe_mutex_unlock( que->que_mutex ); + + /* Submit new job. + */ + que->jobs[que->last].input = quad->input; + que->jobs[que->last].inout = quad->inout; + que->jobs[que->last].routine = routine; + que->last = (que->last + 1) % NUM_QUAD_JOBS; + +#if INSTANT_NOTEMPTY_NOTIFY + /* If the que was empty, notify consumers there's a job to be done. + */ + if (empty) { + pipe_mutex_lock( que->que_mutex ); + pipe_condvar_broadcast( que->que_notempty_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } +#endif +} + +#endif + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const float (*vmax)[4]; + const float (*vmid)[4]; + const float (*vmin)[4]; + const float (*vprovoke)[4]; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + +#if SP_NUM_QUAD_THREADS > 1 + struct quad_job_que que; + struct thread_info threads[SP_NUM_QUAD_THREADS]; +#endif + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif + + unsigned winding; /* which winding to cull */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ + struct thread_info *info = (struct thread_info *) param; + struct quad_job_que *que = &info->setup->que; + + for (;;) { + struct quad_job job; + boolean full; + + /* Wait for an available job. + */ + pipe_mutex_lock( que->que_mutex ); + while (que->last == que->first) + pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); + + /* See if the que is full. + */ + full = (que->last + 1) % NUM_QUAD_JOBS == que->first; + + /* Take a job and remove it from que. + */ + job = que->jobs[que->first]; + que->first = (que->first + 1) % NUM_QUAD_JOBS; + + /* Notify the producer if the que is not full. + */ + if (full) + pipe_condvar_signal( que->que_notfull_condvar ); + pipe_mutex_unlock( que->que_mutex ); + + job.routine( info->setup, info->id, &job ); + + /* Notify the producer if that's the last finished job. + */ + pipe_mutex_lock( que->que_mutex ); + que->jobs_done++; + if (que->jobs_added == que->jobs_done) + pipe_condvar_signal( que->que_done_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } + + return NULL; +} + +#define WAIT_FOR_COMPLETION(setup) \ + do {\ + pipe_mutex_lock( setup->que.que_mutex );\ + if (!INSTANT_NOTEMPTY_NOTIFY)\ + pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ + while (setup->que.jobs_added != setup->que.jobs_done)\ + pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ + pipe_mutex_unlock( setup->que.que_mutex );\ + } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) + +#endif + +/** + * Test if x is NaN or +/- infinity. + */ +static INLINE boolean +is_inf_or_nan(float x) +{ + union fi tmp; + tmp.f = x; + return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); +} + + +static boolean cull_tri( struct setup_context *setup, + float det ) +{ + if (det != 0) + { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & setup->winding) == 0) + return FALSE; + } + + /* Culled: + */ + return TRUE; +} + + + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip( struct setup_context *setup, struct quad_header *quad ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (quad->input.x0 >= maxx || + quad->input.y0 >= maxy || + quad->input.x0 + 1 < minx || + quad->input.y0 + 1 < miny) { + /* totally clipped */ + quad->inout.mask = 0x0; + return; + } + if (quad->input.x0 < minx) + quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->input.y0 < miny) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->input.x0 == maxx - 1) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->input.y0 == maxy - 1) + quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ + quad_clip( setup, quad ); + if (quad->inout.mask) { + struct softpipe_context *sp = setup->softpipe; + + sp->quad[thread].first->run( sp->quad[thread].first, quad ); + } +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + clip_emit_quad( setup, &quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ + struct softpipe_context *sp = setup->softpipe; +#if DEBUG_FRAGS + uint mask = quad->inout.mask; +#endif + +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif + sp->quad[thread].first->run( sp->quad[thread].first, quad ); +#if DEBUG_FRAGS + mask = quad->inout.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + emit_quad( setup, &quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ + } while (0) + +#else + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + emit_quad( setup, &setup->quad, 0 );\ + } while (0) + +#endif + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ + const int xleft0 = setup->span.left[0]; + const int xleft1 = setup->span.left[1]; + const int xright0 = setup->span.right[0]; + const int xright1 = setup->span.right[1]; + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = block(MIN2(xleft0, xleft1)); + maxright = block(MAX2(xright0, xright1)); + for (x = minleft; x <= maxright; x += 2) { + /* determine which of the four pixels is inside the span bounds */ + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = block(xleft0); + maxright = block(xright0); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = block(xleft1); + maxright = block(xright1); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + default: + return; + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, + const float (*v)[4]) +{ + int i; + debug_printf(" Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v[i][0], v[i][1], v[i][2], v[i][3]); + } +} +#endif + +/** + * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise + */ +static boolean setup_sort_vertices( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0[0][1]; + float y1 = v1[0][1]; + float y2 = v2[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; + setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; + setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, det ); + */ + if (is_inf_or_nan(setup->oneoverarea)) + return FALSE; + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; + float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v[0][3] is always W): + */ + float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; + float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin[vertSlot][i], + setup->vmid[vertSlot][i], + setup->vmax[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ + /*X*/ + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.height; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; + } + setup->coef[slot].dadx[1] = 0.0; + /*Z*/ + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ + float vmin_x = setup->vmin[0][0] + 0.5f; + float vmid_x = setup->vmid[0][0] + 0.5f; + + float vmin_y = setup->vmin[0][1] - 0.5f; + float vmid_y = setup->vmid[0][1] - 0.5f; + float vmax_y = setup->vmax[0][1] - 0.5f; + + setup->emaj.sy = ceilf(vmin_y); + setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = ceilf(vmid_y); + setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = ceilf(vmin_y); + setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. + */ +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + float det; + +#if DEBUG_VERTS + debug_printf("Setup triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + if (setup->softpipe->no_rast) + return; + + det = calc_det(v0, v1, v2); + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + + if (cull_tri( setup, det )) + return; + + if (!setup_sort_vertices( setup, det, v0, v1, v2 )) + return; + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.input.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); + + WAIT_FOR_COMPLETION(setup); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE boolean +setup_line_coefficients(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + float area; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = v1; + setup->vmin = v0; + setup->vmax = v1; + + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + + /* NOTE: this is not really area but something proportional to it */ + area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; + if (area == 0.0f || is_inf_or_nan(area)) + return FALSE; + setup->oneoverarea = 1.0f / area; + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + return TRUE; +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.input.x0 || + quadY != setup->quad.input.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.input.x0 != -1) + CLIP_EMIT_QUAD(setup); + + setup->quad.input.x0 = quadX; + setup->quad.input.y0 = quadY; + setup->quad.inout.mask = 0x0; + } + + setup->quad.inout.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + int x0 = (int) v0[0][0]; + int x1 = (int) v1[0][0]; + int y0 = (int) v0[0][1]; + int y1 = (int) v1[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + +#if DEBUG_VERTS + debug_printf("Setup line:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); +#endif + + if (setup->softpipe->no_rast) + return; + + if (dx == 0 && dy == 0) + return; + + if (!setup_line_coefficients(setup, v0, v1)) + return; + + assert(v0[0][0] < 1.0e9); + assert(v0[0][1] < 1.0e9); + assert(v1[0][0] < 1.0e9); + assert(v1[0][1] < 1.0e9); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.input.x0 = setup->quad.input.y0 = -1; + setup->quad.inout.mask = 0x0; + setup->quad.input.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.input.coverage[0] = + setup->quad.input.coverage[1] = + setup->quad.input.coverage[2] = + setup->quad.input.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.inout.mask) { + CLIP_EMIT_QUAD(setup); + } + + WAIT_FOR_COMPLETION(setup); +} + + +static void +point_persp_coeff(struct setup_context *setup, + const float (*vert)[4], + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0[0][0]; /* Note: data[0] is always position */ + const float y = v0[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + +#if DEBUG_VERTS + debug_printf("Setup point:\n"); + print_vertex(setup, v0); +#endif + + if (softpipe->no_rast) + return; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = v0; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.input.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.input.x0 = (int) x - ix; + setup->quad.input.y0 = (int) y - iy; + setup->quad.inout.mask = (1 << ix) << (2 * iy); + CLIP_EMIT_QUAD(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.inout.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.inout.mask) { + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.inout.mask = mask; + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); + } + } + } + } + + WAIT_FOR_COMPLETION(setup); +} + +void setup_prepare( struct setup_context *setup ) +{ + struct softpipe_context *sp = setup->softpipe; + unsigned i; + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + + /* Note: nr_attrs is only used for debugging (vertex printing) */ + setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); + + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first->begin( sp->quad[i].first ); + } + + if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = sp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ + FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ + struct setup_context *setup = CALLOC_STRUCT(setup_context); +#if SP_NUM_QUAD_THREADS > 1 + uint i; +#endif + + setup->softpipe = softpipe; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + +#if SP_NUM_QUAD_THREADS > 1 + setup->que.first = 0; + setup->que.last = 0; + pipe_mutex_init( setup->que.que_mutex ); + pipe_condvar_init( setup->que.que_notfull_condvar ); + pipe_condvar_init( setup->que.que_notempty_condvar ); + setup->que.jobs_added = 0; + setup->que.jobs_done = 0; + pipe_condvar_init( setup->que.que_done_condvar ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + setup->threads[i].setup = setup; + setup->threads[i].id = i; + setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + } +#endif + + return setup; +} + diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 0000000000..d54f334428 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void +setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h new file mode 100644 index 0000000000..3eff41ffa5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; +struct vertex_info; + + +/** + * Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + + struct tgsi_shader_info info; + + void (*prepare)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + unsigned (*run)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad ); + + + void (*delete)( struct sp_fragment_shader * ); +}; + + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { + struct pipe_shader_state shader; /* Note: this field not actually used */ + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c new file mode 100644 index 0000000000..384fe559af --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" + + +void * +softpipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + +void softpipe_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend = (const struct pipe_blend_state *)blend; + + softpipe->dirty |= SP_NEW_BLEND; +} + +void softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) +{ + FREE( blend ); +} + + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend_color = *blend_color; + + softpipe->dirty |= SP_NEW_BLEND; +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + + +void * +softpipe_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +void +softpipe_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; +} + +void +softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE( depth ); +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c new file mode 100644 index 0000000000..4946c776e3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + +void softpipe_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(softpipe->draw, clip); +} + + +void softpipe_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(softpipe->draw, viewport); + + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; +} + + +void softpipe_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + softpipe->scissor = *scissor; /* struct copy */ + softpipe->dirty |= SP_NEW_SCISSOR; +} + + +void softpipe_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + softpipe->poly_stipple = *stipple; /* struct copy */ + softpipe->dirty |= SP_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c new file mode 100644 index 0000000000..6b6a4c3ff3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "sp_context.h" +#include "sp_state.h" + + +/** + * Mark the current vertex layout as "invalid". + * We'll validate the vertex layout later, when we start to actually + * render a point or line or tri. + */ +static void +invalidate_vertex_layout(struct softpipe_context *softpipe) +{ + softpipe->vertex_info.num_attribs = 0; +} + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout and returns a pointer to a + * vertex_info object. + */ +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe) +{ + struct vertex_info *vinfo = &softpipe->vertex_info; + + if (vinfo->num_attribs == 0) { + /* compute vertex layout now */ + const struct sp_fragment_shader *spfs = softpipe->fs; + const enum interp_mode colorInterp + = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + uint i; + + if (softpipe->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(softpipe->draw); + uint i; + + /* No longer any need to try and emit draw vertex_header info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo_vbuf); + } + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + vinfo->num_attribs = 0; + for (i = 0; i < spfs->info.num_inputs; i++) { + int src; + switch (spfs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + src = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + break; + + case TGSI_SEMANTIC_COLOR: + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + if (softpipe->psize_slot > 0) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, + softpipe->psize_slot); + } + + draw_compute_vertex_size(vinfo); + } + + return vinfo; +} + + +/** + * Called from vbuf module. + * + * Note that there's actually two different vertex layouts in softpipe. + * + * The normal one is computed in softpipe_get_vertex_info() above and is + * used by the point/line/tri "setup" code. + * + * The other one (this one) is only used by the vbuf module (which is + * not normally used by default but used in testing). For the vbuf module, + * we basically want to pass-through the draw module's vertex layout as-is. + * When the softpipe vbuf code begins drawing, the normal vertex layout + * will come into play again. + */ +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) +{ + (void) softpipe_get_vertex_info(softpipe); + return &softpipe->vertex_info_vbuf; +} + + +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct softpipe_context *sp) +{ + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void softpipe_update_derived( struct softpipe_context *softpipe ) +{ + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_VS)) + invalidate_vertex_layout( softpipe ); + + if (softpipe->dirty & (SP_NEW_SCISSOR | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER)) + compute_cliprect(softpipe); + + if (softpipe->dirty & (SP_NEW_BLEND | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER | + SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_QUERY)) + sp_build_quad_pipeline(softpipe); + + softpipe->dirty = 0; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c new file mode 100644 index 0000000000..e5b609cf6c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" + + +void * +softpipe_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state; + + /* debug */ + if (softpipe->dump_fs) + tgsi_dump(templ->tokens, 0); + + /* codegen */ + state = softpipe_create_fs_llvm( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } + + assert(state); + + /* get/save the summary info for this shader */ + tgsi_scan_shader(templ->tokens, &state->info); + + return state; +} + + +void +softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->fs = (struct sp_fragment_shader *) fs; + + softpipe->dirty |= SP_NEW_FS; +} + + +void +softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct sp_fragment_shader *state = fs; + + assert(fs != softpipe_context(pipe)->fs); + + state->delete( state ); +} + + +void * +softpipe_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_vertex_shader *state; + + state = CALLOC_STRUCT(sp_vertex_shader); + if (state == NULL ) { + return NULL; + } + + state->draw_data = draw_create_vertex_shader(softpipe->draw, templ); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->vs = (const struct sp_vertex_shader *)vs; + + draw_bind_vertex_shader(softpipe->draw, + (softpipe->vs ? softpipe->vs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_VS; +} + + +void +softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_vertex_shader *state = + (struct sp_vertex_shader *)vs; + + draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( state ); +} + + + +void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + winsys_buffer_reference(ws, + &softpipe->constants[shader].buffer, + buf ? buf->buffer : NULL); + softpipe->constants[shader].size = buf ? buf->size : 0; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c new file mode 100644 index 0000000000..87b7219683 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + return mem_dup(rast, sizeof(*rast)); +} + +void softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(softpipe->draw, setup); + + softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + + softpipe->dirty |= SP_NEW_RASTERIZER; +} + +void softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) +{ + FREE( rasterizer ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c new file mode 100644 index 0000000000..99a28c0d7e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "sp_context.h" +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +void +softpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_samplers && + !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num; ++i) + softpipe->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + softpipe->sampler[i] = NULL; + + softpipe->num_samplers = num; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void +softpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_textures && + !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference(&softpipe->texture[i], tex); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + } + + softpipe->num_textures = num; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c new file mode 100644 index 0000000000..b5376e522d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -0,0 +1,130 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + +#include "draw/draw_context.h" + + +/** + * XXX this might get moved someday + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + * Here, we flush the old surfaces and update the tile cache to point to the new + * surfaces. + */ +void +softpipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct softpipe_context *sp = softpipe_context(pipe); + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + /* check if changing cbuf */ + if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { + /* flush old */ + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + + /* assign new */ + sp->framebuffer.cbufs[i] = fb->cbufs[i]; + + /* update cache */ + sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); + } + } + + sp->framebuffer.num_cbufs = fb->num_cbufs; + + /* zbuf changing? */ + if (sp->framebuffer.zsbuf != fb->zsbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + /* assign new */ + sp->framebuffer.zsbuf = fb->zsbuf; + + /* update cache */ + sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); + } + +#if 0 + /* XXX combined depth/stencil here */ + + /* sbuf changing? */ + if (sp->framebuffer.sbuf != fb->sbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->sbuf_cache_sep); + + /* assign new */ + sp->framebuffer.sbuf = fb->sbuf; + + /* update cache */ + if (fb->sbuf != fb->zbuf) { + /* separate stencil buf */ + sp->sbuf_cache = sp->sbuf_cache_sep; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + else { + /* combined depth/stencil */ + sp->sbuf_cache = sp->zbuf_cache; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + } +#endif + + /* Tell draw module how deep the Z/depth buffer is */ + { + int depth_bits; + double mrd; + if (sp->framebuffer.zsbuf) { + depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, + PIPE_FORMAT_COMP_Z); + } + else { + depth_bits = 0; + } + if (depth_bits > 16) { + mrd = 0.0000001; + } + else { + mrd = 0.00002; + } + draw_set_mrd(sp->draw, mrd); + } + + sp->framebuffer.width = fb->width; + sp->framebuffer.height = fb->height; + + sp->dirty |= SP_NEW_FRAMEBUFFER; +} diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c new file mode 100644 index 0000000000..46b6991195 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" + +#include "draw/draw_context.h" + + +void +softpipe_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_element, attribs, + count * sizeof(struct pipe_vertex_element)); + softpipe->num_vertex_elements = count; + + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_elements(softpipe->draw, count, attribs); +} + + +void +softpipe_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); + softpipe->num_vertex_buffers = count; + + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_buffers(softpipe->draw, count, buffers); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c new file mode 100644 index 0000000000..6ade732698 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_rect.h" +#include "sp_context.h" + + + +void +sp_init_surface_functions(struct softpipe_context *sp) +{ + sp->pipe.surface_copy = util_surface_copy; + sp->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h new file mode 100644 index 0000000000..22de3ba43f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SURFACE_H +#define SP_SURFACE_H + + +struct softpipe_context; + + +extern void +sp_init_surface_functions(struct softpipe_context *sp); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c new file mode 100644 index 0000000000..32aa5025e4 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -0,0 +1,1229 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - util_ifloor(f)) + + +/** + * Linear interpolation macro + */ +static INLINE float +lerp(float a, float v0, float v1) +{ + return v0 + a * (v1 - v0); +} + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = lerp(a, v00, v10); + const float temp1 = lerp(a, v01, v11); + return lerp(b, temp0, temp1); +} + + +/** + * As above, but 3D interpolation of 8 values. + */ +static INLINE float +lerp_3d(float a, float b, float c, + float v000, float v100, float v010, float v110, + float v001, float v101, float v011, float v111) +{ + const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); + const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); + return lerp(c, temp0, temp1); +} + + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture indexes + * for a vector of four texcoords (S or T or P). + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the incoming texcoords + * \param size the texture image size + * \param icoord returns the integer texcoords + * \return integer texture index + */ +static INLINE void +nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = REMAINDER(i, size); + } + return; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = fabsf(s[ch]); + if (u <= 0.0F) + icoord[ch] = 0; + else if (u >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); + } + } + return; + default: + assert(0); + } +} + + +/** + * Used to compute texel locations for linear sampling for four texcoords. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoords + * \param size the texture image size + * \param icoord0 returns first texture indexes + * \param icoord1 returns second texture indexes (usually icoord0 + 1) + * \param w returns blend factor/weight between texture indexes + * \param icoord returns the computed integer texture coords + */ +static INLINE void +linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = REMAINDER(util_ifloor(u), size); + icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + } + break;; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE void +nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); + } + return; + default: + assert(0); + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + return; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break; + default: + assert(0); + } +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(const struct pipe_texture *tex, + const struct pipe_sampler_state *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + float rho, lambda; + + assert(sampler->normalized_coords); + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + rho = MAX2(dsdx, dsdy) * tex->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + max = MAX2(dtdx, dtdy) * tex->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = fabsf(dpdx); + dpdy = fabsf(dpdy); + max = MAX2(dpdx, dpdy) * tex->depth[0]; + rho = MAX2(rho, max); + } + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + * \param level0 Returns first mipmap level to sample from + * \param level1 Returns second mipmap level to sample from + * \param levelBlend Returns blend factor between levels, in [0,1] + * \param imgFilter Returns either the min or mag filter, depending on lambda + */ +static void +choose_mipmap_levels(const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *level0 = *level1 = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); + + if (sampler->min_img_filter != sampler->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); + if (lambda <= 0.0) { + *imgFilter = sampler->mag_img_filter; + } + else { + *imgFilter = sampler->min_img_filter; + } + } + else { + *imgFilter = sampler->mag_img_filter; + } + } + else { + float lambda; + + if (computeLambda) + /* fragment shader */ + lambda = compute_lambda(texture, sampler, s, t, p, lodbias); + else + /* vertex shader */ + lambda = lodbias; /* not really a bias, but absolute LOD */ + + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into sp_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel(const struct tgsi_sampler *tgsi_sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + rgba[0][j] = sampler->border_color[0]; + rgba[1][j] = sampler->border_color[1]; + rgba[2][j] = sampler->border_color[2]; + rgba[3][j] = sampler->border_color[3]; + } + else { + const int tx = x % TILE_SIZE; + const int ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(sp, samp->cache, + x, y, z, face, level); + rgba[0][j] = tile->data.color[ty][tx][0]; + rgba[1][j] = tile->data.color[ty][tx][1]; + rgba[2][j] = tile->data.color[ty][tx][2]; + rgba[3][j] = tile->data.color[ty][tx][3]; + if (0) + { + debug_printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], + pf_name(texture->format)); + } + } +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ +static INLINE void +shadow_compare(uint compare_func, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + k = 0; + assert(0); + break; + } + + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; +} + + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const uint compare_func = sampler->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, + rgba2, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], + tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static INLINE void +sp_get_samples_1d(const struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, tzero, NULL, + computeLambda, lodbias, rgba, faces); +} + + +static INLINE void +sp_get_samples_2d(const struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, t, p, + computeLambda, lodbias, rgba, faces); +} + + +static INLINE void +sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->normalized_coords); + + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4], z[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + nearest_texcoord_4(sampler->wrap_r, p, depth, z); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + z[j] /= 2; + get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + float tx0[4][4], tx1[4][4]; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + z0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + z1[j] /= 2; + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_cube(const struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + sp_get_samples_2d_common(sampler, ssss, tttt, NULL, + computeLambda, lodbias, rgba, faces); +} + + +static void +sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const uint face = 0; + const uint compare_func = sampler->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = texture->width[level0]; + height = texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + { + int x[4], y[4]; + nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); + nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Common code for vertex/fragment program texture sampling. + */ +static INLINE void +sp_get_samples(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + boolean computeLambda, + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (!texture) + return; + + switch (texture->target) { + case PIPE_TEXTURE_1D: + assert(sampler->normalized_coords); + sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + break; + case PIPE_TEXTURE_2D: + if (sampler->normalized_coords) + sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + else + sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + break; + case PIPE_TEXTURE_3D: + assert(sampler->normalized_coords); + sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + break; + case PIPE_TEXTURE_CUBE: + assert(sampler->normalized_coords); + sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); + break; + default: + assert(0); + } + +#if 0 /* DEBUG */ + { + int i; + printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); + for (i = 0; i < 4; i++) { + printf("Frag %d: %f %f %f %f\n", i, + rgba[0][i], + rgba[1][i], + rgba[2][i], + rgba[3][i]); + } + } +#endif +} + + +/** + * Called via tgsi_sampler::get_samples() when running a fragment shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); +} + + +/** + * Called via tgsi_sampler::get_samples() when running a vertex shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba); +} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h new file mode 100644 index 0000000000..40d8eb2c2a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEX_SAMPLE_H +#define SP_TEX_SAMPLE_H + + +#include "tgsi/tgsi_exec.h" + + +/** + * Subclass of tgsi_sampler + */ +struct sp_shader_sampler +{ + struct tgsi_sampler base; /**< base class */ + + uint unit; + struct softpipe_context *sp; + struct softpipe_tile_cache *cache; +}; + + + +static INLINE const struct sp_shader_sampler * +sp_shader_sampler(const struct tgsi_sampler *sampler) +{ + return (const struct sp_shader_sampler *) sampler; +} + + +extern void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + +extern void +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +#endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c new file mode 100644 index 0000000000..a64dc89f43 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -0,0 +1,370 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "sp_screen.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +/* Conventional allocation path for non-display textures: + */ +static boolean +softpipe_texture_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + unsigned buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level]*pt->block.size; + + spt->level_offset[level] = buffer_size; + + buffer_size += (pt->nblocksy[level] * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + spt->stride[level]); + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + buffer_size); + + return spt->buffer != NULL; +} + +/* Hack it up to use the old winsys->surface_alloc_storage() + * method for now: + */ +static boolean +softpipe_displaytarget_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface surf; + unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + int ret; + + + memset(&surf, 0, sizeof(surf)); + + ret =ws->surface_alloc_storage( ws, + &surf, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + flags, + spt->base.tex_usage); + if(ret != 0) + return FALSE; + + if (!surf.buffer) { + /* allocation failed */ + return FALSE; + } + + /* Now extract the goodies: + */ + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->stride[0] = surf.stride; + + /* Transfer the reference: + */ + spt->buffer = surf.buffer; + surf.buffer = NULL; + + return spt->buffer != NULL; +} + + + + + +static struct pipe_texture * +softpipe_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; + + if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!softpipe_displaytarget_layout(screen, spt)) + goto fail; + } + else { + if (!softpipe_texture_layout(screen, spt)) + goto fail; + } + + assert(spt->base.refcount == 1); + return &spt->base; + + fail: + FREE(spt); + return NULL; +} + + +static struct pipe_texture * +softpipe_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct softpipe_texture *spt; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *base; + spt->base.refcount = 1; + spt->base.screen = screen; + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->stride[0] = stride[0]; + + pipe_buffer_reference(screen, &spt->buffer, buffer); + + return &spt->base; +} + + +static void +softpipe_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + pipe_buffer_reference(screen, &spt->buffer, NULL); + FREE(spt); + } + *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = CALLOC_STRUCT(pipe_surface); + ps->refcount = 1; + if (ps) { + assert(ps->refcount); + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(screen, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = spt->stride[level]; + ps->offset = spt->level_offset[level]; + ps->usage = usage; + + /* Because we are softpipe, anything that the state tracker + * thought was going to be done with the GPU will actually get + * done with the CPU. Let's adjust the flags to take that into + * account. + */ + if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* GPU_WRITE means "render" and that can involve reads (blending) */ + ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; + } + + if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) + ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* Mark the surface as dirty. The tile cache will look for this. */ + spt->modified = TRUE; + } + + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + ps->nblocksy * + ps->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +softpipe_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + if (--surf->refcount == 0) { + pipe_texture_reference(&surf->texture, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); + FREE(surf); + } + *s = NULL; +} + + +static void * +softpipe_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ + softpipe_screen(screen)->timestamp++; + } + + return map + surface->offset; +} + + +static void +softpipe_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + pipe_buffer_unmap( screen, surface->buffer ); +} + + +void +softpipe_init_texture_funcs(struct softpipe_context *sp) +{ +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create; + screen->texture_blanket = softpipe_texture_blanket; + screen->texture_release = softpipe_texture_release; + + screen->get_tex_surface = softpipe_get_tex_surface; + screen->tex_surface_release = softpipe_tex_surface_release; + + screen->surface_map = softpipe_surface_map; + screen->surface_unmap = softpipe_surface_unmap; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h new file mode 100644 index 0000000000..bf437a7c61 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEXTURE_H +#define SP_TEXTURE_H + + +#include "pipe/p_state.h" + + +struct pipe_context; +struct pipe_screen; +struct softpipe_context; + + +struct softpipe_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + + boolean modified; +}; + + +/** cast wrapper */ +static INLINE struct softpipe_texture * +softpipe_texture(struct pipe_texture *pt) +{ + return (struct softpipe_texture *) pt; +} + + +extern void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ); + +extern void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c new file mode 100644 index 0000000000..78b0efa46d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -0,0 +1,614 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Framebuffer/surface tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + +#define NUM_ENTRIES 32 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + void *surface_map; + struct pipe_texture *texture; /**< if caching a texture */ + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; + uint clear_val; + boolean depth_stencil; /** Is the surface a depth/stencil format? */ + + struct pipe_surface *tex_surf; + void *tex_surf_map; + int tex_face, tex_level, tex_z; + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ +}; + + +/** + * Return the position in the cache for the tile that contains win pos (x,y). + * We currently use a direct mapped cache so this is like a hack key. + * At some point we should investige something more sophisticated, like + * a LRU replacement policy. + */ +#define CACHE_POS(x, y) \ + (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + + + +/** + * Is the tile at (x,y) in cleared state? + */ +static INLINE uint +is_clear_flag_set(const uint *bitvec, int x, int y) +{ + int pos, bit; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bit = bitvec[pos / 32] & (1 << (pos & 31)); + return bit; +} + + +/** + * Mark the tile at (x,y) as not cleared. + */ +static INLINE void +clear_clear_flag(uint *bitvec, int x, int y) +{ + int pos; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bitvec[pos / 32] &= ~(1 << (pos & 31)); +} + + +struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ) +{ + struct softpipe_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tile_cache ); + if (tc) { + tc->screen = screen; + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = + tc->entries[pos].y = -1; + } + } + return tc; +} + + +void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc) +{ + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->surface) { + pipe_surface_reference(&tc->surface, NULL); + } + if (tc->tex_surf) { + pipe_surface_reference(&tc->tex_surf, NULL); + } + + FREE( tc ); +} + + +/** + * Specify the surface to cache. + */ +void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *ps) +{ + assert(!tc->texture); + + if (tc->surface_map) { + tc->screen->surface_unmap(tc->screen, tc->surface); + tc->surface_map = NULL; + } + + pipe_surface_reference(&tc->surface, ps); + + if (tc->surface) { + if (tc->surface_map) /* XXX: this is always NULL!? */ + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + + tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_X8Z24_UNORM || + ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z24X8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM || + ps->format == PIPE_FORMAT_Z32_UNORM || + ps->format == PIPE_FORMAT_S8_UNORM); + } +} + + +/** + * Return the surface being cached. + */ +struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) +{ + return tc->surface; +} + + +void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface && !tc->surface_map) + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ); + + if (tc->tex_surf && !tc->tex_surf_map) + tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, + PIPE_BUFFER_USAGE_CPU_READ); +} + + +void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface_map) { + tc->screen->surface_unmap(tc->screen, tc->surface); + tc->surface_map = NULL; + } + + if (tc->tex_surf_map) { + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + tc->tex_surf_map = NULL; + } +} + + +/** + * Specify the texture to cache. + */ +void +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->surface); + + pipe_texture_reference(&tc->texture, texture); + + if (tc->tex_surf_map) { + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + tc->tex_surf_map = NULL; + } + pipe_surface_reference(&tc->tex_surf, NULL); + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -1; + } + + tc->tex_face = -1; /* any invalid value here */ +} + + +/** + * Set pixels in a tile to the given clear color/value, float. + */ +static void +clear_tile_rgba(struct softpipe_cached_tile *tile, + enum pipe_format format, + const float clear_value[4]) +{ + if (clear_value[0] == 0.0 && + clear_value[1] == 0.0 && + clear_value[2] == 0.0 && + clear_value[3] == 0.0) { + memset(tile->data.color, 0, sizeof(tile->data.color)); + } + else { + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color[i][j][0] = clear_value[0]; + tile->data.color[i][j][1] = clear_value[1]; + tile->data.color[i][j][2] = clear_value[2]; + tile->data.color[i][j][3] = clear_value[3]; + } + } + } +} + + +/** + * Set a tile to a solid value/color. + */ +static void +clear_tile(struct softpipe_cached_tile *tile, + enum pipe_format format, + uint clear_value) +{ + uint i, j; + + switch (pf_get_size(format)) { + case 1: + memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + break; + case 2: + if (clear_value == 0) { + memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.depth16[i][j] = (ushort) clear_value; + } + } + } + break; + case 4: + if (clear_value == 0) { + memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color32[i][j] = clear_value; + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Actually clear the tiles which were flagged as being in a clear state. + */ +static void +sp_tile_cache_flush_clear(struct pipe_context *pipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + const uint w = tc->surface->width; + const uint h = tc->surface->height; + uint x, y; + uint numCleared = 0; + + /* clear the scratch tile to the clear value */ + clear_tile(&tc->tile, ps->format, tc->clear_val); + + /* push the tile to all positions marked as clear */ + for (y = 0; y < h; y += TILE_SIZE) { + for (x = 0; x < w; x += TILE_SIZE) { + if (is_clear_flag_set(tc->clear_flags, x, y)) { + pipe_put_tile_raw(ps, + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); + + /* do this? */ + clear_clear_flag(tc->clear_flags, x, y); + + numCleared++; + } + } + } +#if 0 + debug_printf("num cleared: %u\n", numCleared); +#endif +} + + +/** + * Flush the tile cache: write all dirty tiles back to the surface. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + int inuse = 0, pos; + + if (ps && ps->buffer) { + /* caching a drawing surface */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; + } + } + +#if TILE_CLEAR_OPTIMIZATION + sp_tile_cache_flush_clear(&softpipe->pipe, tc); +#endif + } + else if (tc->texture) { + /* caching a texture, mark all entries as empty */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = -1; + } + tc->tex_face = -1; + } + +#if 0 + debug_printf("flushed tiles in use: %d\n", inuse); +#endif +} + + +/** + * Get a tile from the cache. + * \param x, y position of tile, in pixels + */ +struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y) +{ + struct pipe_surface *ps = tc->surface; + + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + + /* cache pos/entry: */ + const int pos = CACHE_POS(x, y); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y) { + + if (tile->x != -1) { + /* put dirty tile back in framebuffer */ + if (tc->depth_stencil) { + pipe_put_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + + tile->x = tile_x; + tile->y = tile_y; + + if (is_clear_flag_set(tc->clear_flags, x, y)) { + /* don't get tile from framebuffer, just clear it */ + if (tc->depth_stencil) { + clear_tile(tile, ps->format, tc->clear_val); + } + else { + clear_tile_rgba(tile, ps->format, tc->clear_color); + } + clear_clear_flag(tc->clear_flags, x, y); + } + else { + /* get new tile data from surface */ + if (tc->depth_stencil) { + pipe_get_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_get_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + } + + return tile; +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos(int x, int y, int z, int face, int level) +{ + uint entry = x + y * 5 + z * 4 + face + level; + return entry % NUM_ENTRIES; +} + + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct softpipe_context *sp, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level) +{ + struct pipe_screen *screen = sp->pipe.screen; + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + /* cache pos/entry: */ + const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, + face, level); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->modified) { + /* texture was modified, force a cache reload */ + tile->x = -1; + spt->modified = FALSE; + } + } + + if (tile_x != tile->x || + tile_y != tile->y || + z != tile->z || + face != tile->face || + level != tile->level) { + /* cache miss */ + + /* check if we need to get a new surface */ + if (!tc->tex_surf || + tc->tex_face != face || + tc->tex_level != level || + tc->tex_z != z) { + /* get new surface (view into texture) */ + + if (tc->tex_surf_map) + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, + PIPE_BUFFER_USAGE_CPU_READ); + tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, + PIPE_BUFFER_USAGE_CPU_READ); + + tc->tex_face = face; + tc->tex_level = level; + tc->tex_z = z; + } + + /* get tile from the surface (view into texture) */ + pipe_get_tile_rgba(tc->tex_surf, + tile_x, tile_y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->x = tile_x; + tile->y = tile_y; + tile->z = z; + tile->face = face; + tile->level = level; + } + + return tile; +} + + +/** + * When a whole surface is being cleared to a value we can avoid + * fetching tiles above. + * Save the color and set a 'clearflag' for each tile of the screen. + */ +void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) +{ + uint r, g, b, a; + uint pos; + + tc->clear_val = clearValue; + + switch (tc->surface->format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + r = (clearValue >> 24) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 8) & 0xff; + a = (clearValue ) & 0xff; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = (clearValue >> 16) & 0xff; + g = (clearValue >> 8) & 0xff; + b = (clearValue ) & 0xff; + a = (clearValue >> 24) & 0xff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + r = (clearValue >> 8) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 24) & 0xff; + a = (clearValue ) & 0xff; + break; + default: + r = g = b = a = 0; + } + + tc->clear_color[0] = r / 255.0f; + tc->clear_color[1] = g / 255.0f; + tc->clear_color[2] = b / 255.0f; + tc->clear_color[3] = a / 255.0f; + +#if TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#else + /* disable the optimization */ + memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); +#endif + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + tile->x = tile->y = -1; + } +} diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h new file mode 100644 index 0000000000..a66bb50bcc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TILE_CACHE_H +#define SP_TILE_CACHE_H + +#define TILE_CLEAR_OPTIMIZATION 1 + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + + +struct softpipe_cached_tile +{ + int x, y; /**< pos of tile in window coords */ + int z, face, level; /**< Extra texture indexes */ + union { + float color[TILE_SIZE][TILE_SIZE][4]; + uint color32[TILE_SIZE][TILE_SIZE]; + uint depth32[TILE_SIZE][TILE_SIZE]; + ushort depth16[TILE_SIZE][TILE_SIZE]; + ubyte stencil8[TILE_SIZE][TILE_SIZE]; + ubyte any[1]; + } data; +}; + + +extern struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ); + +extern void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *sps); + +extern struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, + struct pipe_texture *texture); + +extern void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue); + +extern struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y); + +extern const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level); + + +#endif /* SP_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h new file mode 100644 index 0000000000..4ab666486c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* This is the interface that softpipe requires any window system + * hosting it to implement. This is the only include file in softpipe + * which is public. + */ + + +#ifndef SP_WINSYS_H +#define SP_WINSYS_H + + +#include "pipe/p_compiler.h" /* for boolean */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +enum pipe_format; + +struct softpipe_winsys { + /** test if the given format is supported for front/back color bufs */ + boolean (*is_format_supported)( struct softpipe_winsys *sws, + enum pipe_format format ); + +}; + +struct pipe_screen; +struct pipe_winsys; +struct pipe_context; + + +struct pipe_context *softpipe_create( struct pipe_screen *, + struct pipe_winsys *, + void *unused ); + + +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *); + + +#ifdef __cplusplus +} +#endif + +#endif /* SP_WINSYS_H */ diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README new file mode 100644 index 0000000000..f0e1cd596d --- /dev/null +++ b/src/gallium/drivers/trace/README @@ -0,0 +1,64 @@ + TRACE PIPE DRIVER + + += About = + +This directory contains a Gallium3D pipe driver which traces all incoming calls. + + += Build Instructions = + +To build, invoke scons on the top dir as + + scons statetrackers=mesa drivers=softpipe,i965simple,trace winsys=xlib + + += Usage = + +To use do + + ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 + export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib + +ensure the right libGL.so is being picked by doing + + ldd progs/trivial/tri + +and then try running + + GALLIUM_TRACE=tri.trace progs/trivial/tri + +which should create a tri.trace file, which is an XML file. You can view copying +trace.xsl to the same directory, and opening with a XSLT capable browser such as +Firefox or Internet Explorer. + + += Integrating = + +You can integrate the trace pipe driver either inside the state tracker or the +winsys. The procedure on both cases is the same. Let's assume you have a +pipe_screen and a pipe_context pair obtained by the usual means (variable and +function names are just for illustration purposes): + + real_screen = real_screen_create(...); + + real_context = real_context_create(...); + +The trace screen and pipe_context is then created by doing + + trace_screen = trace_screen_create(real_screen); + + trace_context = trace_context_create(trace_screen, real_context); + +You can then simply use trace_screen and trace_context instead of real_screen +and real_context. + +Do not call trace_winsys_create. Simply pass trace_screen->winsys or +trace_context->winsys in places you would pass winsys. + +You can create as many contexts you wish. Just ensure that you don't mistake +trace_screen with real_screen when creating them. + + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript new file mode 100644 index 0000000000..0a6bfb8f4c --- /dev/null +++ b/src/gallium/drivers/trace/SConscript @@ -0,0 +1,16 @@ +Import('*') + +env = env.Clone() + +trace = env.ConvenienceLibrary( + target = 'trace', + source = [ + 'tr_context.c', + 'tr_dump.c', + 'tr_screen.c', + 'tr_state.c', + 'tr_texture.c', + 'tr_winsys.c', + ]) + +Export('trace')
\ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c new file mode 100644 index 0000000000..f0d51ad82e --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.c @@ -0,0 +1,1072 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_screen.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" +#include "tr_context.h" + + +static INLINE struct pipe_texture * +trace_texture_unwrap(struct trace_context *tr_ctx, + struct pipe_texture *texture) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + + if(!texture) + return NULL; + + tr_tex = trace_texture(tr_scr, texture); + + assert(tr_tex->texture); + assert(tr_tex->texture->screen == tr_scr->screen); + return tr_tex->texture; +} + + +static INLINE struct pipe_surface * +trace_surface_unwrap(struct trace_context *tr_ctx, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + + if(!surface) + return NULL; + + assert(surface->texture); + if(!surface->texture) + return surface; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + + assert(tr_surf->surface); + assert(tr_surf->surface->texture->screen == tr_scr->screen); + return tr_surf->surface; +} + + +static INLINE void +trace_context_set_edgeflags(struct pipe_context *_pipe, + const unsigned *bitfield) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_edgeflags"); + + trace_dump_arg(ptr, pipe); + /* FIXME: we don't know how big this array is */ + trace_dump_arg(ptr, bitfield); + + pipe->set_edgeflags(pipe, bitfield);; + + trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_draw_arrays(struct pipe_context *_pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_dump_call_begin("pipe_context", "draw_arrays"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_arrays(pipe, mode, start, count);; + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE boolean +trace_context_draw_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + + trace_dump_call_begin("pipe_context", "draw_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE boolean +trace_context_draw_range_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + + trace_dump_call_begin("pipe_context", "draw_range_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, minIndex); + trace_dump_arg(uint, maxIndex); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE struct pipe_query * +trace_context_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_query *result; + + trace_dump_call_begin("pipe_context", "create_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, query_type); + + result = pipe->create_query(pipe, query_type);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "destroy_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->destroy_query(pipe, query);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "begin_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->begin_query(pipe, query);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "end_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->end_query(pipe, query); + + trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + uint64_t *presult) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + uint64_t result; + boolean _result; + + trace_dump_call_begin("pipe_context", "get_query_result"); + + trace_dump_arg(ptr, pipe); + + _result = pipe->get_query_result(pipe, query, wait, presult);; + result = *presult; + + trace_dump_arg(uint, result); + trace_dump_ret(bool, _result); + + trace_dump_call_end(); + + return _result; +} + + +static INLINE void * +trace_context_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_state, state); + + result = pipe->create_blend_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_blend_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_blend_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_sampler_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(sampler_state, state); + + result = pipe->create_sampler_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_sampler_states(struct pipe_context *_pipe, + unsigned num_states, void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_sampler_states"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); + + pipe->bind_sampler_states(pipe, num_states, states);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_sampler_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_sampler_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_sampler_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(rasterizer_state, state); + + result = pipe->create_rasterizer_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_rasterizer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_rasterizer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); + + result = pipe->create_depth_stencil_alpha_state(pipe, state);; + + trace_dump_arg(ptr, pipe); + trace_dump_arg(depth_stencil_alpha_state, state); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_depth_stencil_alpha_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_depth_stencil_alpha_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); + + result = pipe->create_fs_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_fs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_fs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); + + result = pipe->create_vs_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_vs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_vs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_blend_color"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_color, state); + + pipe->set_blend_color(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_clip_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(clip_state, state); + + pipe->set_clip_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_constant_buffer(struct pipe_context *_pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buffer) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_winsys_user_buffer_update(_pipe->winsys, (struct pipe_buffer *)buffer); + + trace_dump_call_begin("pipe_context", "set_constant_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, index); + trace_dump_arg(constant_buffer, buffer); + + pipe->set_constant_buffer(pipe, shader, index, buffer);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_framebuffer_state unwrapped_state; + unsigned i; + + /* Unwrap the input state */ + memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); + for(i = 0; i < state->num_cbufs; ++i) + unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); + for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); + state = &unwrapped_state; + + trace_dump_call_begin("pipe_context", "set_framebuffer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(framebuffer_state, state); + + pipe->set_framebuffer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_polygon_stipple"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(poly_stipple, state); + + pipe->set_polygon_stipple(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_scissor_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(scissor_state, state); + + pipe->set_scissor_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_viewport_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(viewport_state, state); + + pipe->set_viewport_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_SAMPLERS]; + unsigned i; + + for(i = 0; i < num_textures; ++i) + unwrapped_textures[i] = trace_texture_unwrap(tr_ctx, textures[i]); + textures = unwrapped_textures; + + trace_dump_call_begin("pipe_context", "set_sampler_textures"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); + + pipe->set_sampler_textures(pipe, num_textures, textures);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + unsigned i; + + for(i = 0; i < num_buffers; ++i) + trace_winsys_user_buffer_update(_pipe->winsys, buffers[i].buffer); + + trace_dump_call_begin("pipe_context", "set_vertex_buffers"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_buffers); + + trace_dump_arg_begin("buffers"); + trace_dump_struct_array(vertex_buffer, buffers, num_buffers); + trace_dump_arg_end(); + + pipe->set_vertex_buffers(pipe, num_buffers, buffers);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_elements(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_vertex_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_elements); + + trace_dump_arg_begin("elements"); + trace_dump_struct_array(vertex_element, elements, num_elements); + trace_dump_arg_end(); + + pipe->set_vertex_elements(pipe, num_elements, elements);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_copy(struct pipe_context *_pipe, + boolean do_flip, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + dest = trace_surface_unwrap(tr_ctx, dest); + src = trace_surface_unwrap(tr_ctx, src); + + trace_dump_call_begin("pipe_context", "surface_copy"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, do_flip); + trace_dump_arg(ptr, dest); + trace_dump_arg(uint, destx); + trace_dump_arg(uint, desty); + trace_dump_arg(ptr, src); + trace_dump_arg(uint, srcx); + trace_dump_arg(uint, srcy); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + + pipe->surface_copy(pipe, do_flip, + dest, destx, desty, + src, srcx, srcy, width, height); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_fill(struct pipe_context *_pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + dst = trace_surface_unwrap(tr_ctx, dst); + + trace_dump_call_begin("pipe_context", "surface_fill"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, dst); + trace_dump_arg(uint, dstx); + trace_dump_arg(uint, dsty); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_clear(struct pipe_context *_pipe, + struct pipe_surface *surface, + unsigned clearValue) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + surface = trace_surface_unwrap(tr_ctx, surface); + + trace_dump_call_begin("pipe_context", "clear"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, clearValue); + + pipe->clear(pipe, surface, clearValue);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "flush"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, flags); + + pipe->flush(pipe, flags, fence);; + + if(fence) + trace_dump_ret(ptr, *fence); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_destroy(struct pipe_context *_pipe) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "destroy"); + + trace_dump_arg(ptr, pipe); + + pipe->destroy(pipe); + + trace_dump_call_end(); + + FREE(tr_ctx); +} + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe) +{ + struct trace_context *tr_ctx; + + if(!pipe) + goto error1; + + if(!trace_dump_enabled()) + goto error1; + + tr_ctx = CALLOC_STRUCT(trace_context); + if(!tr_ctx) + goto error1; + + tr_ctx->base.winsys = screen->winsys; + tr_ctx->base.screen = screen; + tr_ctx->base.destroy = trace_context_destroy; + tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; + tr_ctx->base.draw_arrays = trace_context_draw_arrays; + tr_ctx->base.draw_elements = trace_context_draw_elements; + tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.create_query = trace_context_create_query; + tr_ctx->base.destroy_query = trace_context_destroy_query; + tr_ctx->base.begin_query = trace_context_begin_query; + tr_ctx->base.end_query = trace_context_end_query; + tr_ctx->base.get_query_result = trace_context_get_query_result; + tr_ctx->base.create_blend_state = trace_context_create_blend_state; + tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; + tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; + tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; + tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; + tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; + tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; + tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state; + tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state; + tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state; + tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state; + tr_ctx->base.create_fs_state = trace_context_create_fs_state; + tr_ctx->base.bind_fs_state = trace_context_bind_fs_state; + tr_ctx->base.delete_fs_state = trace_context_delete_fs_state; + tr_ctx->base.create_vs_state = trace_context_create_vs_state; + tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; + tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; + tr_ctx->base.set_blend_color = trace_context_set_blend_color; + tr_ctx->base.set_clip_state = trace_context_set_clip_state; + tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer; + tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state; + tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; + tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; + tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; + tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; + tr_ctx->base.surface_copy = trace_context_surface_copy; + tr_ctx->base.surface_fill = trace_context_surface_fill; + tr_ctx->base.clear = trace_context_clear; + tr_ctx->base.flush = trace_context_flush; + + tr_ctx->pipe = pipe; + + trace_dump_call_begin("", "pipe_context_create"); + trace_dump_arg_begin("screen"); + trace_dump_ptr(pipe->screen); + trace_dump_arg_end(); + trace_dump_ret(ptr, pipe); + trace_dump_call_end(); + + return &tr_ctx->base; + +error1: + return pipe; +} diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h new file mode 100644 index 0000000000..7831900ec2 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_CONTEXT_H_ +#define TR_CONTEXT_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_context.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct trace_context +{ + struct pipe_context base; + + struct pipe_context *pipe; +}; + + +static INLINE struct trace_context * +trace_context(struct pipe_context *pipe) +{ + assert(pipe); + return (struct trace_context *)pipe; +} + + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c new file mode 100644 index 0000000000..a0ead0ded3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.c @@ -0,0 +1,404 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Trace dumping functions. + * + * For now we just use standard XML for dumping the trace calls, as this is + * simple to write, parse, and visually inspect, but the actual representation + * is abstracted out of this file, so that we can switch to a binary + * representation if/when it becomes justified. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <stdlib.h> +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_stream.h" + +#include "tr_dump.h" + + +static struct util_stream *stream = NULL; +static unsigned refcount = 0; + + +static INLINE void +trace_dump_write(const char *buf, size_t size) +{ + if(stream) + util_stream_write(stream, buf, size); +} + + +static INLINE void +trace_dump_writes(const char *s) +{ + trace_dump_write(s, strlen(s)); +} + + +static INLINE void +trace_dump_writef(const char *format, ...) +{ + static char buf[1024]; + unsigned len; + va_list ap; + va_start(ap, format); + len = util_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + trace_dump_write(buf, len); +} + + +static INLINE void +trace_dump_escape(const char *str) +{ + const unsigned char *p = (const unsigned char *)str; + unsigned char c; + while((c = *p++) != 0) { + if(c == '<') + trace_dump_writes("<"); + else if(c == '>') + trace_dump_writes(">"); + else if(c == '&') + trace_dump_writes("&"); + else if(c == '\'') + trace_dump_writes("'"); + else if(c == '\"') + trace_dump_writes("""); + else if(c >= 0x20 && c <= 0x7e) + trace_dump_writef("%c", c); + else + trace_dump_writef("&#%u;", c); + } +} + + +static INLINE void +trace_dump_indent(unsigned level) +{ + unsigned i; + for(i = 0; i < level; ++i) + trace_dump_writes("\t"); +} + + +static INLINE void +trace_dump_newline(void) +{ + trace_dump_writes("\n"); +} + + +static INLINE void +trace_dump_tag(const char *name) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes("/>"); +} + + +static INLINE void +trace_dump_tag_begin(const char *name) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(">"); +} + +static INLINE void +trace_dump_tag_begin1(const char *name, + const char *attr1, const char *value1) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("='"); + trace_dump_escape(value1); + trace_dump_writes("'>"); +} + + +static INLINE void +trace_dump_tag_begin2(const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\'>"); +} + + +static INLINE void +trace_dump_tag_begin3(const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2, + const char *attr3, const char *value3) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\' "); + trace_dump_writes(attr3); + trace_dump_writes("=\'"); + trace_dump_escape(value3); + trace_dump_writes("\'>"); +} + + +static INLINE void +trace_dump_tag_end(const char *name) +{ + trace_dump_writes("</"); + trace_dump_writes(name); + trace_dump_writes(">"); +} + +static void +trace_dump_trace_close(void) +{ + if(stream) { + trace_dump_writes("</trace>\n"); + util_stream_close(stream); + stream = NULL; + refcount = 0; + } +} + +boolean trace_dump_trace_begin() +{ + const char *filename; + + filename = debug_get_option("GALLIUM_TRACE", NULL); + if(!filename) + return FALSE; + + if(!stream) { + + stream = util_stream_create(filename, 0); + if(!stream) + return FALSE; + + trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); + trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); + trace_dump_writes("<trace version='0.1'>\n"); + +#if defined(PIPE_OS_LINUX) + /* Linux applications rarely cleanup GL / Gallium resources so catch + * application exit here */ + atexit(trace_dump_trace_close); +#endif + } + + ++refcount; + + return TRUE; +} + +boolean trace_dump_enabled(void) +{ + return stream ? TRUE : FALSE; +} + +void trace_dump_trace_end(void) +{ + if(stream) + if(!--refcount) + trace_dump_trace_close(); +} + +void trace_dump_call_begin(const char *klass, const char *method) +{ + trace_dump_indent(1); + trace_dump_tag_begin2("call", "class", klass, "method", method); + trace_dump_newline(); +} + +void trace_dump_call_end(void) +{ + trace_dump_indent(1); + trace_dump_tag_end("call"); + trace_dump_newline(); + util_stream_flush(stream); +} + +void trace_dump_arg_begin(const char *name) +{ + trace_dump_indent(2); + trace_dump_tag_begin1("arg", "name", name); +} + +void trace_dump_arg_end(void) +{ + trace_dump_tag_end("arg"); + trace_dump_newline(); +} + +void trace_dump_ret_begin(void) +{ + trace_dump_indent(2); + trace_dump_tag_begin("ret"); +} + +void trace_dump_ret_end(void) +{ + trace_dump_tag_end("ret"); + trace_dump_newline(); +} + +void trace_dump_bool(int value) +{ + trace_dump_writef("<bool>%c</bool>", value ? '1' : '0'); +} + +void trace_dump_int(long long int value) +{ + trace_dump_writef("<int>%lli</int>", value); +} + +void trace_dump_uint(long long unsigned value) +{ + trace_dump_writef("<uint>%llu</uint>", value); +} + +void trace_dump_float(double value) +{ + trace_dump_writef("<float>%g</float>", value); +} + +void trace_dump_bytes(const void *data, + long unsigned size) +{ + static const char hex_table[16] = "0123456789ABCDEF"; + const uint8_t *p = data; + long unsigned i; + trace_dump_writes("<bytes>"); + for(i = 0; i < size; ++i) { + uint8_t byte = *p++; + char hex[2]; + hex[0] = hex_table[byte >> 4]; + hex[1] = hex_table[byte & 0xf]; + trace_dump_write(hex, 2); + } + trace_dump_writes("</bytes>"); +} + +void trace_dump_string(const char *str) +{ + trace_dump_writes("<string>"); + trace_dump_escape(str); + trace_dump_writes("</string>"); +} + +void trace_dump_enum(const char *value) +{ + trace_dump_writes("<enum>"); + trace_dump_escape(value); + trace_dump_writes("</enum>"); +} + +void trace_dump_array_begin(void) +{ + trace_dump_writes("<array>"); +} + +void trace_dump_array_end(void) +{ + trace_dump_writes("</array>"); +} + +void trace_dump_elem_begin(void) +{ + trace_dump_writes("<elem>"); +} + +void trace_dump_elem_end(void) +{ + trace_dump_writes("</elem>"); +} + +void trace_dump_struct_begin(const char *name) +{ + trace_dump_writef("<struct name='%s'>", name); +} + +void trace_dump_struct_end(void) +{ + trace_dump_writes("</struct>"); +} + +void trace_dump_member_begin(const char *name) +{ + trace_dump_writef("<member name='%s'>", name); +} + +void trace_dump_member_end(void) +{ + trace_dump_writes("</member>"); +} + +void trace_dump_null(void) +{ + trace_dump_writes("<null/>"); +} + +void trace_dump_ptr(const void *value) +{ + if(value) + trace_dump_writef("<ptr>0x%08lx</ptr>", (unsigned long)(uintptr_t)value); + else + trace_dump_null(); +} diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h new file mode 100644 index 0000000000..76a53731b3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.h @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Trace data dumping primitives. + */ + +#ifndef TR_DUMP_H +#define TR_DUMP_H + + +#include "pipe/p_compiler.h" + + +boolean trace_dump_trace_begin(void); +boolean trace_dump_enabled(void); +void trace_dump_trace_end(void); +void trace_dump_call_begin(const char *klass, const char *method); +void trace_dump_call_end(void); +void trace_dump_arg_begin(const char *name); +void trace_dump_arg_end(void); +void trace_dump_ret_begin(void); +void trace_dump_ret_end(void); +void trace_dump_bool(int value); +void trace_dump_int(long long int value); +void trace_dump_uint(long long unsigned value); +void trace_dump_float(double value); +void trace_dump_bytes(const void *data, long unsigned size); +void trace_dump_string(const char *str); +void trace_dump_enum(const char *value); +void trace_dump_array_begin(void); +void trace_dump_array_end(void); +void trace_dump_elem_begin(void); +void trace_dump_elem_end(void); +void trace_dump_struct_begin(const char *name); +void trace_dump_struct_end(void); +void trace_dump_member_begin(const char *name); +void trace_dump_member_end(void); +void trace_dump_null(void); +void trace_dump_ptr(const void *value); + + +/* + * Code saving macros. + */ + +#define trace_dump_arg(_type, _arg) \ + do { \ + trace_dump_arg_begin(#_arg); \ + trace_dump_##_type(_arg); \ + trace_dump_arg_end(); \ + } while(0) + +#define trace_dump_ret(_type, _arg) \ + do { \ + trace_dump_ret_begin(); \ + trace_dump_##_type(_arg); \ + trace_dump_ret_end(); \ + } while(0) + +#define trace_dump_array(_type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(); \ + for(idx = 0; idx < (_size); ++idx) { \ + trace_dump_elem_begin(); \ + trace_dump_##_type((_obj)[idx]); \ + trace_dump_elem_end(); \ + } \ + trace_dump_array_end(); \ + } while(0) + +#define trace_dump_struct_array(_type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(); \ + for(idx = 0; idx < (_size); ++idx) { \ + trace_dump_elem_begin(); \ + trace_dump_##_type(&(_obj)[idx]); \ + trace_dump_elem_end(); \ + } \ + trace_dump_array_end(); \ + } while(0) + +#define trace_dump_member(_type, _obj, _member) \ + do { \ + trace_dump_member_begin(#_member); \ + trace_dump_##_type((_obj)->_member); \ + trace_dump_member_end(); \ + } while(0) + +#define trace_dump_arg_array(_type, _arg, _size) \ + do { \ + trace_dump_arg_begin(#_arg); \ + trace_dump_array(_type, _arg, _size); \ + trace_dump_arg_end(); \ + } while(0) + +#define trace_dump_member_array(_type, _obj, _member) \ + do { \ + trace_dump_member_begin(#_member); \ + trace_dump_array(_type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ + trace_dump_member_end(); \ + } while(0) + + +#endif /* TR_DUMP_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c new file mode 100644 index 0000000000..8789f86b1a --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_winsys.h" +#include "tr_texture.h" +#include "tr_screen.h" + + +static const char * +trace_screen_get_name(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin("pipe_screen", "get_name"); + + trace_dump_arg(ptr, screen); + + result = screen->get_name(screen); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static const char * +trace_screen_get_vendor(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin("pipe_screen", "get_vendor"); + + trace_dump_arg(ptr, screen); + + result = screen->get_vendor(screen); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static int +trace_screen_get_param(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + int result; + + trace_dump_call_begin("pipe_screen", "get_param"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + + result = screen->get_param(screen, param); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static float +trace_screen_get_paramf(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + float result; + + trace_dump_call_begin("pipe_screen", "get_paramf"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + + result = screen->get_paramf(screen, param); + + trace_dump_ret(float, result); + + trace_dump_call_end(); + + return result; +} + + +static boolean +trace_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + boolean result; + + trace_dump_call_begin("pipe_screen", "is_format_supported"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(format, format); + trace_dump_arg(int, target); + trace_dump_arg(uint, tex_usage); + trace_dump_arg(uint, geom_flags); + + result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags); + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_create(struct pipe_screen *_screen, + const struct pipe_texture *templat) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct pipe_texture *result; + + trace_dump_call_begin("pipe_screen", "texture_create"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); + + result = screen->texture_create(screen, templat); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_texture_create(tr_scr, result); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_blanket(struct pipe_screen *_screen, + const struct pipe_texture *templat, + const unsigned *ppitch, + struct pipe_buffer *buffer) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + unsigned pitch = *ppitch; + struct pipe_texture *result; + + trace_dump_call_begin("pipe_screen", "texture_blanket"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); + trace_dump_arg(uint, pitch); + trace_dump_arg(ptr, buffer); + + result = screen->texture_blanket(screen, templat, ppitch, buffer); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_texture_create(tr_scr, result); + + return result; +} + + +static void +trace_screen_texture_release(struct pipe_screen *_screen, + struct pipe_texture **ptexture) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct pipe_texture *texture; + + assert(ptexture); + if(*ptexture) { + tr_tex = trace_texture(tr_scr, *ptexture); + texture = tr_tex->texture; + assert(texture->screen == screen); + } + else + texture = NULL; + + if (*ptexture) { + if (!--(*ptexture)->refcount) { + trace_dump_call_begin("pipe_screen", "texture_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + + trace_texture_destroy(tr_scr, *ptexture); + + trace_dump_call_end(); + } + + *ptexture = NULL; + } +} + + +static struct pipe_surface * +trace_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct pipe_surface *result; + + assert(texture); + tr_tex = trace_texture(tr_scr, texture); + texture = tr_tex->texture; + assert(texture->screen == screen); + + trace_dump_call_begin("pipe_screen", "get_tex_surface"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); + + result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_surface_create(tr_tex, result); + + return result; +} + + +static void +trace_screen_tex_surface_release(struct pipe_screen *_screen, + struct pipe_surface **psurface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + struct pipe_surface *surface; + + assert(psurface); + if(*psurface) { + tr_tex = trace_texture(tr_scr, (*psurface)->texture); + tr_surf = trace_surface(tr_tex, *psurface); + surface = tr_surf->surface; + } + else + surface = NULL; + + if (*psurface) { + if (!--(*psurface)->refcount) { + trace_dump_call_begin("pipe_screen", "tex_surface_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); + + trace_surface_destroy(tr_tex, *psurface); + + trace_dump_call_end(); + } + + *psurface = NULL; + } +} + + +static void * +trace_screen_surface_map(struct pipe_screen *_screen, + struct pipe_surface *surface, + unsigned flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + void *map; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + + map = screen->surface_map(screen, surface, flags); + if(map) { + if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!tr_surf->map); + tr_surf->map = map; + } + } + + return map; +} + + +static void +trace_screen_surface_unmap(struct pipe_screen *_screen, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + + if(tr_surf->map) { + size_t size = surface->nblocksy * surface->stride; + + trace_dump_call_begin("pipe_winsys", "surface_write"); + + trace_dump_arg(ptr, screen); + + trace_dump_arg(ptr, surface); + + trace_dump_arg_begin("data"); + trace_dump_bytes(tr_surf->map, size); + trace_dump_arg_end(); + + trace_dump_arg_begin("stride"); + trace_dump_uint(surface->stride); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + tr_surf->map = NULL; + } + + screen->surface_unmap(screen, surface); +} + + +static void +trace_screen_destroy(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "destroy"); + + trace_dump_arg(ptr, screen); + + screen->destroy(screen); + + trace_dump_call_end(); + + trace_dump_trace_end(); + + FREE(tr_scr); +} + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen) +{ + struct trace_screen *tr_scr; + struct pipe_winsys *winsys; + + if(!screen) + goto error1; + + if(!trace_dump_trace_begin()) + goto error1; + + tr_scr = CALLOC_STRUCT(trace_screen); + if(!tr_scr) + goto error2; + + winsys = trace_winsys_create(screen->winsys); + if(!winsys) + goto error3; + + tr_scr->base.winsys = winsys; + tr_scr->base.destroy = trace_screen_destroy; + tr_scr->base.get_name = trace_screen_get_name; + tr_scr->base.get_vendor = trace_screen_get_vendor; + tr_scr->base.get_param = trace_screen_get_param; + tr_scr->base.get_paramf = trace_screen_get_paramf; + tr_scr->base.is_format_supported = trace_screen_is_format_supported; + tr_scr->base.texture_create = trace_screen_texture_create; + tr_scr->base.texture_blanket = trace_screen_texture_blanket; + tr_scr->base.texture_release = trace_screen_texture_release; + tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; + tr_scr->base.tex_surface_release = trace_screen_tex_surface_release; + tr_scr->base.surface_map = trace_screen_surface_map; + tr_scr->base.surface_unmap = trace_screen_surface_unmap; + + tr_scr->screen = screen; + + trace_dump_call_begin("", "pipe_screen_create"); + trace_dump_arg_begin("winsys"); + trace_dump_ptr(screen->winsys); + trace_dump_arg_end(); + trace_dump_ret(ptr, screen); + trace_dump_call_end(); + + return &tr_scr->base; + +error3: + FREE(tr_scr); +error2: + trace_dump_trace_end(); +error1: + return screen; +} + + +struct trace_screen * +trace_screen(struct pipe_screen *screen) +{ + assert(screen); + assert(screen->destroy == trace_screen_destroy); + return (struct trace_screen *)screen; +} diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h new file mode 100644 index 0000000000..93fefdb9a5 --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_SCREEN_H_ +#define TR_SCREEN_H_ + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct trace_screen +{ + struct pipe_screen base; + + struct pipe_screen *screen; +}; + + +struct trace_screen * +trace_screen(struct pipe_screen *screen); + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_SCREEN_H_ */ diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c new file mode 100644 index 0000000000..986d939e0c --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.c @@ -0,0 +1,464 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" + +#include "tr_dump.h" +#include "tr_state.h" + + +void trace_dump_format(enum pipe_format format) +{ + trace_dump_enum(pf_name(format) ); +} + + +void trace_dump_block(const struct pipe_format_block *block) +{ + trace_dump_struct_begin("pipe_format_block"); + trace_dump_member(uint, block, size); + trace_dump_member(uint, block, width); + trace_dump_member(uint, block, height); + trace_dump_struct_end(); +} + + +void trace_dump_template(const struct pipe_texture *templat) +{ + if(!templat) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_texture"); + + trace_dump_member(int, templat, target); + trace_dump_member(format, templat, format); + + trace_dump_member_begin("width"); + trace_dump_array(uint, templat->width, 1); + trace_dump_member_end(); + + trace_dump_member_begin("height"); + trace_dump_array(uint, templat->height, 1); + trace_dump_member_end(); + + trace_dump_member_begin("depth"); + trace_dump_array(uint, templat->depth, 1); + trace_dump_member_end(); + + trace_dump_member_begin("block"); + trace_dump_block(&templat->block); + trace_dump_member_end(); + + trace_dump_member(uint, templat, last_level); + trace_dump_member(uint, templat, tex_usage); + + trace_dump_struct_end(); +} + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_rasterizer_state"); + + trace_dump_member(bool, state, flatshade); + trace_dump_member(bool, state, light_twoside); + trace_dump_member(uint, state, front_winding); + trace_dump_member(uint, state, cull_mode); + trace_dump_member(uint, state, fill_cw); + trace_dump_member(uint, state, fill_ccw); + trace_dump_member(bool, state, offset_cw); + trace_dump_member(bool, state, offset_ccw); + trace_dump_member(bool, state, scissor); + trace_dump_member(bool, state, poly_smooth); + trace_dump_member(bool, state, poly_stipple_enable); + trace_dump_member(bool, state, point_smooth); + trace_dump_member(bool, state, point_sprite); + trace_dump_member(bool, state, point_size_per_vertex); + trace_dump_member(bool, state, multisample); + trace_dump_member(bool, state, line_smooth); + trace_dump_member(bool, state, line_stipple_enable); + trace_dump_member(uint, state, line_stipple_factor); + trace_dump_member(uint, state, line_stipple_pattern); + trace_dump_member(bool, state, line_last_pixel); + trace_dump_member(bool, state, bypass_clipping); + trace_dump_member(bool, state, bypass_vs); + trace_dump_member(bool, state, origin_lower_left); + trace_dump_member(bool, state, flatshade_first); + trace_dump_member(bool, state, gl_rasterization_rules); + + trace_dump_member(float, state, line_width); + trace_dump_member(float, state, point_size); + trace_dump_member(float, state, point_size_min); + trace_dump_member(float, state, point_size_max); + trace_dump_member(float, state, offset_units); + trace_dump_member(float, state, offset_scale); + + trace_dump_member_array(uint, state, sprite_coord_mode); + + trace_dump_struct_end(); +} + + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_poly_stipple"); + + trace_dump_member_begin("stipple"); + trace_dump_array(uint, + state->stipple, + Elements(state->stipple)); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_viewport_state(const struct pipe_viewport_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_viewport_state"); + + trace_dump_member_array(float, state, scale); + trace_dump_member_array(float, state, translate); + + trace_dump_struct_end(); +} + + +void trace_dump_scissor_state(const struct pipe_scissor_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_scissor_state"); + + trace_dump_member(uint, state, minx); + trace_dump_member(uint, state, miny); + trace_dump_member(uint, state, maxx); + trace_dump_member(uint, state, maxy); + + trace_dump_struct_end(); +} + + +void trace_dump_clip_state(const struct pipe_clip_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_clip_state"); + + trace_dump_member_begin("ucp"); + trace_dump_array_begin(); + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { + trace_dump_elem_begin(); + trace_dump_array(float, state->ucp[i], 4); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member(uint, state, nr); + + trace_dump_struct_end(); +} + + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_constant_buffer"); + + trace_dump_member(ptr, state, buffer); + trace_dump_member(uint, state, size); + + trace_dump_struct_end(); +} + + +void trace_dump_shader_state(const struct pipe_shader_state *state) +{ + static char str[8192]; + + if(!state) { + trace_dump_null(); + return; + } + + tgsi_dump_str(state->tokens, 0, str, sizeof(str)); + + trace_dump_struct_begin("pipe_shader_state"); + + trace_dump_member_begin("tokens"); + trace_dump_string(str); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); + + trace_dump_member_begin("depth"); + trace_dump_struct_begin("pipe_depth_state"); + trace_dump_member(bool, &state->depth, enabled); + trace_dump_member(bool, &state->depth, writemask); + trace_dump_member(uint, &state->depth, func); + trace_dump_member(bool, &state->depth, occlusion_count); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_member_begin("stencil"); + trace_dump_array_begin(); + for(i = 0; i < Elements(state->stencil); ++i) { + trace_dump_elem_begin(); + trace_dump_struct_begin("pipe_stencil_state"); + trace_dump_member(bool, &state->stencil[i], enabled); + trace_dump_member(uint, &state->stencil[i], func); + trace_dump_member(uint, &state->stencil[i], fail_op); + trace_dump_member(uint, &state->stencil[i], zpass_op); + trace_dump_member(uint, &state->stencil[i], zfail_op); + trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], value_mask); + trace_dump_member(uint, &state->stencil[i], write_mask); + trace_dump_struct_end(); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member_begin("alpha"); + trace_dump_struct_begin("pipe_alpha_state"); + trace_dump_member(bool, &state->alpha, enabled); + trace_dump_member(uint, &state->alpha, func); + trace_dump_member(float, &state->alpha, ref); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_state(const struct pipe_blend_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_state"); + + trace_dump_member(bool, state, blend_enable); + + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); + + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); + + trace_dump_member(bool, state, logicop_enable); + trace_dump_member(uint, state, logicop_func); + + trace_dump_member(uint, state, colormask); + trace_dump_member(bool, state, dither); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_color(const struct pipe_blend_color *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_color"); + + trace_dump_member_array(float, state, color); + + trace_dump_struct_end(); +} + + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) +{ + trace_dump_struct_begin("pipe_framebuffer_state"); + + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + trace_dump_member(uint, state, num_cbufs); + trace_dump_member_array(ptr, state, cbufs); + trace_dump_member(ptr, state, zsbuf); + + trace_dump_struct_end(); +} + + +void trace_dump_sampler_state(const struct pipe_sampler_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_sampler_state"); + + trace_dump_member(uint, state, wrap_s); + trace_dump_member(uint, state, wrap_t); + trace_dump_member(uint, state, wrap_r); + trace_dump_member(uint, state, min_img_filter); + trace_dump_member(uint, state, min_mip_filter); + trace_dump_member(uint, state, mag_img_filter); + trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_func); + trace_dump_member(bool, state, normalized_coords); + trace_dump_member(uint, state, prefilter); + trace_dump_member(float, state, shadow_ambient); + trace_dump_member(float, state, lod_bias); + trace_dump_member(float, state, min_lod); + trace_dump_member(float, state, max_lod); + trace_dump_member_array(float, state, border_color); + trace_dump_member(float, state, max_anisotropy); + + trace_dump_struct_end(); +} + + +void trace_dump_surface(const struct pipe_surface *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_surface"); + + trace_dump_member(ptr, state, buffer); + trace_dump_member(format, state, format); + trace_dump_member(uint, state, status); + trace_dump_member(uint, state, clear_value); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + + trace_dump_member_begin("block"); + trace_dump_block(&state->block); + trace_dump_member_end(); + + trace_dump_member(uint, state, nblocksx); + trace_dump_member(uint, state, nblocksy); + trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, layout); + trace_dump_member(uint, state, offset); + trace_dump_member(uint, state, refcount); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_buffer"); + + trace_dump_member(uint, state, pitch); + trace_dump_member(uint, state, max_index); + trace_dump_member(uint, state, buffer_offset); + trace_dump_member(ptr, state, buffer); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_element(const struct pipe_vertex_element *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_element"); + + trace_dump_member(uint, state, src_offset); + + trace_dump_member(uint, state, vertex_buffer_index); + trace_dump_member(uint, state, nr_components); + + trace_dump_member(format, state, src_format); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h new file mode 100644 index 0000000000..5ae533dc66 --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_STATE_H +#define TR_STATE_H + +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +void trace_dump_format(enum pipe_format format); + +void trace_dump_block(const struct pipe_format_block *block); + +void trace_dump_template(const struct pipe_texture *templat); + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); + +void trace_dump_viewport_state(const struct pipe_viewport_state *state); + +void trace_dump_scissor_state(const struct pipe_scissor_state *state); + +void trace_dump_clip_state(const struct pipe_clip_state *state); + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); + +void trace_dump_token(const struct tgsi_token *token); + +void trace_dump_shader_state(const struct pipe_shader_state *state); + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); + +void trace_dump_blend_state(const struct pipe_blend_state *state); + +void trace_dump_blend_color(const struct pipe_blend_color *state); + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); + +void trace_dump_sampler_state(const struct pipe_sampler_state *state); + +void trace_dump_surface(const struct pipe_surface *state); + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); + +void trace_dump_vertex_element(const struct pipe_vertex_element *state); + + +#endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c new file mode 100644 index 0000000000..440a78704a --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_hash_table.h" +#include "util/u_memory.h" + +#include "tr_screen.h" +#include "tr_texture.h" + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex; + + if(!texture) + goto error; + + assert(texture->screen == tr_scr->screen); + + tr_tex = CALLOC_STRUCT(trace_texture); + if(!tr_tex) + goto error; + + memcpy(&tr_tex->base, texture, sizeof(struct pipe_texture)); + tr_tex->base.screen = &tr_scr->base; + tr_tex->texture = texture; + + return &tr_tex->base; + +error: + pipe_texture_reference(&texture, NULL); + return NULL; +} + + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex = trace_texture(tr_scr, texture); + pipe_texture_reference(&tr_tex->texture, NULL); + FREE(tr_tex); +} + + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf; + + if(!surface) + goto error; + + assert(surface->texture == tr_tex->texture); + + tr_surf = CALLOC_STRUCT(trace_surface); + if(!tr_surf) + goto error; + + memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); + + tr_surf->base.winsys = tr_tex->base.screen->winsys; + tr_surf->base.texture = NULL; + pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); + tr_surf->surface = surface; + + return &tr_surf->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + pipe_texture_reference(&tr_surf->base.texture, NULL); + pipe_surface_reference(&tr_surf->surface, NULL); + FREE(tr_surf); +} + diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h new file mode 100644 index 0000000000..9e72edb8a3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_TEXTURE_H_ +#define TR_TEXTURE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "tr_screen.h" + + +struct trace_texture +{ + struct pipe_texture base; + + struct pipe_texture *texture; +}; + + +struct trace_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; + + void *map; +}; + + +static INLINE struct trace_texture * +trace_texture(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + if(!texture) + return NULL; + assert(texture->screen == &tr_scr->base); + return (struct trace_texture *)texture; +} + + +static INLINE struct trace_surface * +trace_surface(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + if(!surface) + return NULL; + assert(surface->texture == &tr_tex->base); + return (struct trace_surface *)surface; +} + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface); + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface); + + +#endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c new file mode 100644 index 0000000000..177835854e --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -0,0 +1,497 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_hash_table.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" + + +static unsigned trace_buffer_hash(void *buffer) +{ + return (unsigned)(uintptr_t)buffer; +} + + +static int trace_buffer_compare(void *buffer1, void *buffer2) +{ + return (char *)buffer2 - (char *)buffer1; +} + + +static const char * +trace_winsys_get_name(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const char *result; + + trace_dump_call_begin("pipe_winsys", "get_name"); + + trace_dump_arg(ptr, winsys); + + result = winsys->get_name(winsys); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + void *context_private) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + assert(surface); + if(surface->texture) { + struct trace_screen *tr_scr = trace_screen(surface->texture->screen); + struct trace_texture *tr_tex = trace_texture(tr_scr, surface->texture); + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + } + + trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + /* XXX: hide, as there is nothing we can do with this + trace_dump_arg(ptr, context_private); + */ + + winsys->flush_frontbuffer(winsys, surface, context_private); + + trace_dump_call_end(); +} + + +static struct pipe_surface * +trace_winsys_surface_alloc(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *result; + + trace_dump_call_begin("pipe_winsys", "surface_alloc"); + + trace_dump_arg(ptr, winsys); + + result = winsys->surface_alloc(winsys); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + assert(!result || !result->texture); + + return result; +} + + +static int +trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + assert(surface && !surface->texture); + + trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + trace_dump_arg(format, format); + trace_dump_arg(uint, flags); + trace_dump_arg(uint, tex_usage); + + result = winsys->surface_alloc_storage(winsys, + surface, + width, height, + format, + flags, + tex_usage); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_surface_release(struct pipe_winsys *_winsys, + struct pipe_surface **psurface) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *surface = *psurface; + + assert(psurface && *psurface && !(*psurface)->texture); + + trace_dump_call_begin("pipe_winsys", "surface_release"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + + winsys->surface_release(winsys, psurface); + + trace_dump_call_end(); +} + + +static struct pipe_buffer * +trace_winsys_buffer_create(struct pipe_winsys *_winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *buffer; + + trace_dump_call_begin("pipe_winsys", "buffer_create"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(uint, alignment); + trace_dump_arg(uint, usage); + trace_dump_arg(uint, size); + + buffer = winsys->buffer_create(winsys, alignment, usage, size); + + trace_dump_ret(ptr, buffer); + + trace_dump_call_end(); + + /* Zero the buffer to avoid dumping uninitialized memory */ + if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + void *map; + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(map) { + memset(map, 0, buffer->size); + winsys->buffer_unmap(winsys, buffer); + } + } + + return buffer; +} + + +static struct pipe_buffer * +trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, + void *data, + unsigned size) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *result; + + trace_dump_call_begin("pipe_winsys", "user_buffer_create"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg_begin("data"); + trace_dump_bytes(data, size); + trace_dump_arg_end(); + trace_dump_arg(uint, size); + + result = winsys->user_buffer_create(winsys, data, size); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + /* XXX: Mark the user buffers. (we should wrap pipe_buffers, but is is + * impossible to do so while texture-less surfaces are still around */ + if(result) { + assert(!(result->usage & TRACE_BUFFER_USAGE_USER)); + result->usage |= TRACE_BUFFER_USAGE_USER; + } + + return result; +} + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(map) { + trace_dump_call_begin("pipe_winsys", "buffer_write"); + + trace_dump_arg(ptr, winsys); + + trace_dump_arg(ptr, buffer); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + winsys->buffer_unmap(winsys, buffer); + } + } +} + + +static void * +trace_winsys_buffer_map(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer, + unsigned usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + void *map; + + map = winsys->buffer_map(winsys, buffer, usage); + if(map) { + if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!hash_table_get(tr_ws->buffer_maps, buffer)); + hash_table_set(tr_ws->buffer_maps, buffer, map); + } + } + + return map; +} + + +static void +trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + map = hash_table_get(tr_ws->buffer_maps, buffer); + if(map) { + trace_dump_call_begin("pipe_winsys", "buffer_write"); + + trace_dump_arg(ptr, winsys); + + trace_dump_arg(ptr, buffer); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + hash_table_remove(tr_ws->buffer_maps, buffer); + } + + winsys->buffer_unmap(winsys, buffer); +} + + +static void +trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin("pipe_winsys", "buffer_destroy"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, buffer); + + winsys->buffer_destroy(winsys, buffer); + + trace_dump_call_end(); +} + + +static void +trace_winsys_fence_reference(struct pipe_winsys *_winsys, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_fence_handle *dst = *pdst; + + trace_dump_call_begin("pipe_winsys", "fence_reference"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, dst); + trace_dump_arg(ptr, src); + + winsys->fence_reference(winsys, pdst, src); + + trace_dump_call_end(); +} + + +static int +trace_winsys_fence_signalled(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin("pipe_winsys", "fence_signalled"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); + + result = winsys->fence_signalled(winsys, fence, flag); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static int +trace_winsys_fence_finish(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin("pipe_winsys", "fence_finish"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); + + result = winsys->fence_finish(winsys, fence, flag); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_destroy(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin("pipe_winsys", "destroy"); + + trace_dump_arg(ptr, winsys); + + /* + winsys->destroy(winsys); + */ + + trace_dump_call_end(); + + hash_table_destroy(tr_ws->buffer_maps); + + FREE(tr_ws); +} + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys) +{ + struct trace_winsys *tr_ws; + + if(!winsys) + goto error1; + + tr_ws = CALLOC_STRUCT(trace_winsys); + if(!tr_ws) + goto error1; + + tr_ws->base.destroy = trace_winsys_destroy; + tr_ws->base.get_name = trace_winsys_get_name; + tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; + tr_ws->base.surface_alloc = trace_winsys_surface_alloc; + tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage; + tr_ws->base.surface_release = trace_winsys_surface_release; + tr_ws->base.buffer_create = trace_winsys_buffer_create; + tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; + tr_ws->base.buffer_map = trace_winsys_buffer_map; + tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; + tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; + tr_ws->base.fence_reference = trace_winsys_fence_reference; + tr_ws->base.fence_signalled = trace_winsys_fence_signalled; + tr_ws->base.fence_finish = trace_winsys_fence_finish; + + tr_ws->winsys = winsys; + + tr_ws->buffer_maps = hash_table_create(trace_buffer_hash, + trace_buffer_compare); + if(!tr_ws->buffer_maps) + goto error2; + + trace_dump_call_begin("", "pipe_winsys_create"); + trace_dump_ret(ptr, winsys); + trace_dump_call_end(); + + return &tr_ws->base; + +error2: + FREE(tr_ws); +error1: + return winsys; +} diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h new file mode 100644 index 0000000000..062ddf66a0 --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_WINSYS_H_ +#define TR_WINSYS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" + + +/** + * It often happens that new data is written directly to the user buffers + * without mapping/unmapping. This flag marks user buffers, so that their + * contents can be dumpped before being used by the pipe context. + */ +#define TRACE_BUFFER_USAGE_USER (1 << 31) + + +struct hash_table; + + +struct trace_winsys +{ + struct pipe_winsys base; + + struct pipe_winsys *winsys; + + struct hash_table *buffer_maps; +}; + + +static INLINE struct trace_winsys * +trace_winsys(struct pipe_winsys *winsys) +{ + assert(winsys); + return (struct trace_winsys *)winsys; +} + + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys); + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *winsys, + struct pipe_buffer *buffer); + + +#endif /* TR_WINSYS_H_ */ diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl new file mode 100644 index 0000000000..9cd621e7ab --- /dev/null +++ b/src/gallium/drivers/trace/trace.xsl @@ -0,0 +1,185 @@ +<?xml version="1.0"?> + +<!-- + +Copyright 2008 Tungsten Graphics, Inc. + +This program is free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +!--> + +<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + + <xsl:output method="html" /> + + <xsl:strip-space elements="*" /> + + <xsl:template match="/trace"> + <html> + <head> + <title>Gallium Trace</title> + </head> + <style> + body { + font-family: verdana, sans-serif; + font-size: 11px; + font-weight: normal; + text-align : left; + } + + .fun { + font-weight: bold; + } + + .var { + font-style: italic; + } + + .typ { + display: none; + } + + .lit { + color: #0000ff; + } + + .ptr { + color: #008000; + } + </style> + <body> + <ol class="calls"> + <xsl:apply-templates/> + </ol> + </body> + </html> + </xsl:template> + + <xsl:template match="call"> + <li> + <span class="fun"> + <xsl:value-of select="@class"/> + <xsl:text>::</xsl:text> + <xsl:value-of select="@method"/> + </span> + <xsl:text>(</xsl:text> + <xsl:apply-templates select="arg"/> + <xsl:text>)</xsl:text> + <xsl:apply-templates select="ret"/> + </li> + </xsl:template> + + <xsl:template match="arg|member"> + <xsl:apply-templates select="@name"/> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="ret"> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + </xsl:template> + + <xsl:template match="bool|int|uint|float|enum"> + <span class="lit"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="bytes"> + <span class="lit"> + <xsl:text>...</xsl:text> + </span> + </xsl:template> + + <xsl:template match="string"> + <span class="lit"> + <xsl:text>"</xsl:text> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="text()"/> + </xsl:call-template> + <xsl:text>"</xsl:text> + </span> + </xsl:template> + + <xsl:template match="array|struct"> + <xsl:text>{</xsl:text> + <xsl:apply-templates /> + <xsl:text>}</xsl:text> + </xsl:template> + + <xsl:template match="elem"> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="null"> + <span class="ptr"> + <xsl:text>NULL</xsl:text> + </span> + </xsl:template> + + <xsl:template match="ptr"> + <span class="ptr"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="@name"> + <span class="var"> + <xsl:value-of select="."/> + </span> + </xsl:template> + + <xsl:template name="break"> + <xsl:param name="text" select="."/> + <xsl:choose> + <xsl:when test="contains($text, '
')"> + <xsl:value-of select="substring-before($text, '
')"/> + <br/> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="substring-after($text, '
')"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="replace"> + <xsl:param name="text"/> + <xsl:param name="from"/> + <xsl:param name="to"/> + <xsl:choose> + <xsl:when test="contains($text,$from)"> + <xsl:value-of select="concat(substring-before($text,$from),$to)"/> + <xsl:call-template name="replace"> + <xsl:with-param name="text" select="substring-after($text,$from)"/> + <xsl:with-param name="from" select="$from"/> + <xsl:with-param name="to" select="$to"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +</xsl:transform> |