From b642730be93149baa7556e5791393168ab396175 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:35:24 +0900 Subject: Code reorganization: move files into their places. This is in a separate commit to ensure renames are properly preserved. --- src/gallium/drivers/cell/Makefile | 12 + src/gallium/drivers/cell/common.h | 220 +++ src/gallium/drivers/cell/ppu/Makefile | 76 + src/gallium/drivers/cell/ppu/cell_batch.c | 217 +++ src/gallium/drivers/cell/ppu/cell_batch.h | 58 + src/gallium/drivers/cell/ppu/cell_clear.c | 76 + src/gallium/drivers/cell/ppu/cell_clear.h | 43 + src/gallium/drivers/cell/ppu/cell_context.c | 287 +++ src/gallium/drivers/cell/ppu/cell_context.h | 135 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 164 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 42 + src/gallium/drivers/cell/ppu/cell_flush.c | 84 + src/gallium/drivers/cell/ppu/cell_flush.h | 38 + src/gallium/drivers/cell/ppu/cell_render.c | 210 +++ src/gallium/drivers/cell/ppu/cell_render.h | 39 + src/gallium/drivers/cell/ppu/cell_spu.c | 155 ++ src/gallium/drivers/cell/ppu/cell_spu.h | 82 + src/gallium/drivers/cell/ppu/cell_state.h | 115 ++ src/gallium/drivers/cell/ppu/cell_state_blend.c | 109 ++ src/gallium/drivers/cell/ppu/cell_state_clip.c | 84 + src/gallium/drivers/cell/ppu/cell_state_derived.c | 192 ++ src/gallium/drivers/cell/ppu/cell_state_emit.c | 103 ++ src/gallium/drivers/cell/ppu/cell_state_emit.h | 36 + src/gallium/drivers/cell/ppu/cell_state_fs.c | 171 ++ .../drivers/cell/ppu/cell_state_rasterizer.c | 106 ++ src/gallium/drivers/cell/ppu/cell_state_sampler.c | 84 + src/gallium/drivers/cell/ppu/cell_state_surface.c | 71 + src/gallium/drivers/cell/ppu/cell_state_vertex.c | 63 + src/gallium/drivers/cell/ppu/cell_surface.c | 179 ++ src/gallium/drivers/cell/ppu/cell_surface.h | 42 + src/gallium/drivers/cell/ppu/cell_texture.c | 252 +++ src/gallium/drivers/cell/ppu/cell_texture.h | 80 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 294 +++ src/gallium/drivers/cell/ppu/cell_vbuf.h | 38 + src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 120 ++ src/gallium/drivers/cell/ppu/cell_winsys.c | 40 + src/gallium/drivers/cell/ppu/cell_winsys.h | 50 + src/gallium/drivers/cell/spu/Makefile | 72 + src/gallium/drivers/cell/spu/spu_blend.c | 62 + src/gallium/drivers/cell/spu/spu_blend.h | 37 + src/gallium/drivers/cell/spu/spu_colorpack.h | 110 ++ src/gallium/drivers/cell/spu/spu_exec.c | 1948 ++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_exec.h | 172 ++ src/gallium/drivers/cell/spu/spu_main.c | 567 ++++++ src/gallium/drivers/cell/spu/spu_main.h | 177 ++ src/gallium/drivers/cell/spu/spu_render.c | 301 +++ src/gallium/drivers/cell/spu/spu_render.h | 38 + src/gallium/drivers/cell/spu/spu_texture.c | 217 +++ src/gallium/drivers/cell/spu/spu_texture.h | 47 + src/gallium/drivers/cell/spu/spu_tile.c | 83 + src/gallium/drivers/cell/spu/spu_tile.h | 73 + src/gallium/drivers/cell/spu/spu_tri.c | 926 ++++++++++ src/gallium/drivers/cell/spu/spu_tri.h | 37 + src/gallium/drivers/cell/spu/spu_util.c | 165 ++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 673 +++++++ src/gallium/drivers/cell/spu/spu_vertex_shader.c | 231 +++ src/gallium/drivers/cell/spu/spu_vertex_shader.h | 63 + src/gallium/drivers/cell/spu/spu_ztest.h | 135 ++ 58 files changed, 10301 insertions(+) create mode 100644 src/gallium/drivers/cell/Makefile create mode 100644 src/gallium/drivers/cell/common.h create mode 100644 src/gallium/drivers/cell/ppu/Makefile create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.c create mode 100644 src/gallium/drivers/cell/ppu/cell_batch.h create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.c create mode 100644 src/gallium/drivers/cell/ppu/cell_clear.h create mode 100644 src/gallium/drivers/cell/ppu/cell_context.c create mode 100644 src/gallium/drivers/cell/ppu/cell_context.h create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.c create mode 100644 src/gallium/drivers/cell/ppu/cell_draw_arrays.h create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.c create mode 100644 src/gallium/drivers/cell/ppu/cell_flush.h create mode 100644 src/gallium/drivers/cell/ppu/cell_render.c create mode 100644 src/gallium/drivers/cell/ppu/cell_render.h create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.c create mode 100644 src/gallium/drivers/cell/ppu/cell_spu.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_blend.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_clip.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_derived.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_emit.h create mode 100644 src/gallium/drivers/cell/ppu/cell_state_fs.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_rasterizer.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_sampler.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_vertex.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.c create mode 100644 src/gallium/drivers/cell/ppu/cell_surface.h create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.c create mode 100644 src/gallium/drivers/cell/ppu/cell_texture.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.c create mode 100644 src/gallium/drivers/cell/ppu/cell_vbuf.h create mode 100644 src/gallium/drivers/cell/ppu/cell_vertex_shader.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.c create mode 100644 src/gallium/drivers/cell/ppu/cell_winsys.h create mode 100644 src/gallium/drivers/cell/spu/Makefile create mode 100644 src/gallium/drivers/cell/spu/spu_blend.c create mode 100644 src/gallium/drivers/cell/spu/spu_blend.h create mode 100644 src/gallium/drivers/cell/spu/spu_colorpack.h create mode 100644 src/gallium/drivers/cell/spu/spu_exec.c create mode 100644 src/gallium/drivers/cell/spu/spu_exec.h create mode 100644 src/gallium/drivers/cell/spu/spu_main.c create mode 100644 src/gallium/drivers/cell/spu/spu_main.h create mode 100644 src/gallium/drivers/cell/spu/spu_render.c create mode 100644 src/gallium/drivers/cell/spu/spu_render.h create mode 100644 src/gallium/drivers/cell/spu/spu_texture.c create mode 100644 src/gallium/drivers/cell/spu/spu_texture.h create mode 100644 src/gallium/drivers/cell/spu/spu_tile.c create mode 100644 src/gallium/drivers/cell/spu/spu_tile.h create mode 100644 src/gallium/drivers/cell/spu/spu_tri.c create mode 100644 src/gallium/drivers/cell/spu/spu_tri.h create mode 100644 src/gallium/drivers/cell/spu/spu_util.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_fetch.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.c create mode 100644 src/gallium/drivers/cell/spu/spu_vertex_shader.h create mode 100644 src/gallium/drivers/cell/spu/spu_ztest.h (limited to 'src/gallium/drivers/cell') diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 0000000000..47aef7b05f --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: + ( cd spu ; make ) + ( cd ppu ; make ) + + + +clean: + ( cd spu ; make clean ) + ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 0000000000..4de514c358 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_format.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ + if (!(x)) { \ + ubyte *p = NULL; \ + fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ + __FILE__, __LINE__, __FUNCTION__, #x); \ + *p = 0; \ + exit(1); \ + } + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 6 + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT 1 +#define CELL_CMD_CLEAR_SURFACE 2 +#define CELL_CMD_FINISH 3 +#define CELL_CMD_RENDER 4 +#define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 +#define CELL_CMD_STATE_FRAMEBUFFER 10 +#define CELL_CMD_STATE_DEPTH_STENCIL 11 +#define CELL_CMD_STATE_SAMPLER 12 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_VS_ARRAY_INFO 16 +#define CELL_CMD_STATE_BLEND 17 +#define CELL_CMD_VS_EXECUTE 18 + + +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ + uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + int width, height; + void *color_start, *depth_start; + enum pipe_format color_format, depth_format; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ + uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + uint surface; /**< Temporary: 0=color, 1=Z */ + uint value; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint format; /**< Pipe format of each entry. */ +} ALIGN16_ATTRIB; + + +struct cell_shader_info +{ + unsigned num_outputs; + + uint64_t declarations; + unsigned num_declarations; + uint64_t instructions; + unsigned num_instructions; + uint64_t uniforms; + uint64_t immediates; + unsigned num_immediates; +} ALIGN16_ATTRIB; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + struct cell_shader_info shader; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + uint64_t vOut[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +} ALIGN16_ATTRIB; + + +struct cell_command_render +{ + uint64_t opcode; /**< CELL_CMD_RENDER */ + uint prim_type; /**< PIPE_PRIM_x */ + uint num_verts; + uint vertex_size; /**< bytes per vertex */ + uint num_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; + boolean inline_verts; +}; + + +struct cell_command_release_verts +{ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +struct cell_command_texture +{ + void *start; /**< Address in main memory */ + uint width, height; +}; + + +/** XXX unions don't seem to work */ +/* XXX this should go away; all commands should be placed in batch buffers */ +struct cell_command +{ +#if 0 + struct cell_command_framebuffer fb; + struct cell_command_clear_surface clear; + struct cell_command_render render; +#endif + struct cell_command_vs vs; +} ALIGN16_ATTRIB; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ + unsigned id; + unsigned num_spus; + struct cell_command *cmd; + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; + uint *buffer_status; /**< points at cell_context->buffer_status */ +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 0000000000..50060f5cd3 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,76 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the g3dcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +#PROG = gl4 + +CELL_LIB = libcell.a + +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ + cell_batch.c \ + cell_clear.c \ + cell_context.c \ + cell_draw_arrays.c \ + cell_flush.c \ + cell_state_blend.c \ + cell_state_clip.c \ + cell_state_derived.c \ + cell_state_emit.c \ + cell_state_fs.c \ + cell_state_rasterizer.c \ + cell_state_sampler.c \ + cell_state_surface.c \ + cell_state_vertex.c \ + cell_spu.c \ + cell_surface.c \ + cell_texture.c \ + cell_vbuf.c \ + cell_vertex_shader.c \ + cell_winsys.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) + ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: + rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 0000000000..f45e5f25b6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_spu.h" + + + +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + uint buf = 0, tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u\n", buf); + */ + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + +void +cell_batch_flush(struct cell_context *cell) +{ + static boolean flushing = FALSE; + uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + uint spu, cmd_word; + + assert(!flushing); + + if (size == 0) + return; + + flushing = TRUE; + + assert(batch < CELL_NUM_BUFFERS); + + /* + printf("cell_batch_dispatch: buf %u at %p, size %u\n", + batch, &cell->batch_buffer[batch][0], size); + */ + + /* + * Build "BATCH" command and sent to all SPUs. + */ + cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + + for (spu = 0; spu < cell->num_spus; spu++) { + assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); + send_mbox_message(cell_global.spe_contexts[spu], cmd_word); + } + + /* When the SPUs are done copying the buffer into their locals stores + * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] + * array indicating that the PPU can re-use the buffer. + */ + + batch = cell_get_empty_buffer(cell); + + cell->buffer_size[batch] = 0; /* empty */ + cell->cur_batch = batch; + + flushing = FALSE; +} + + +uint +cell_batch_free_space(const struct cell_context *cell) +{ + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + return free; +} + + +/** + * Append data to current batch. + */ +void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes) +{ + uint size; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + if (size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); + + cell->buffer_size[cell->cur_batch] = size + bytes; +} + + +void * +cell_batch_alloc(struct cell_context *cell, uint bytes) +{ + return cell_batch_alloc_aligned(cell, bytes, 1); +} + + +void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment) +{ + void *pos; + uint size, padbytes; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(alignment > 0); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + padbytes = (alignment - (size % alignment)) % alignment; + + if (padbytes + size + bytes > CELL_BUFFER_SIZE) { + cell_batch_flush(cell); + size = 0; + } + else { + size += padbytes; + } + + ASSERT(size % alignment == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + pos = (void *) (cell->buffer[cell->cur_batch] + size); + + cell->buffer_size[cell->cur_batch] = size + bytes; + + return pos; +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 0000000000..a6eee0a8b1 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes); + +extern void * +cell_batch_alloc(struct cell_context *cell, uint bytes); + +extern void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 0000000000..07b908eec5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + +#include +#include +#include +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" + + +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct cell_context *cell = cell_context(pipe); + uint surfIndex; + + if (cell->dirty) + cell_update_derived(cell); + + + if (!cell->cbuf_map[0]) + cell->cbuf_map[0] = pipe_surface_map(ps); + + if (ps == cell->framebuffer.zsbuf) { + surfIndex = 1; + } + else { + surfIndex = 0; + } + + + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) + cell_batch_alloc(cell, sizeof(*clr)); + clr->opcode = CELL_CMD_CLEAR_SURFACE; + clr->surface = surfIndex; + clr->value = clearValue; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 0000000000..ff47d43f4c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 0000000000..bbe1fd7a11 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,287 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_render.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static boolean +cell_is_format_supported( struct pipe_context *pipe, + enum pipe_format format, uint type ) +{ + /*struct cell_context *cell = cell_context( pipe );*/ + + switch (type) { + case PIPE_TEXTURE: + /* cell supports all texture formats, XXX for now anyway */ + return TRUE; + case PIPE_SURFACE: + /* cell supports all (off-screen) surface formats, XXX for now */ + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static int cell_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + +static float cell_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 0; + } +} + + +static const char * +cell_get_name( struct pipe_context *pipe ) +{ + return "Cell"; +} + +static const char * +cell_get_vendor( struct pipe_context *pipe ) +{ + return "Tungsten Graphics, Inc."; +} + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ + struct cell_context *cell = cell_context(pipe); + + cell_spu_exit(cell); + + align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } + + return draw; +} + + +struct pipe_context * +cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) +{ + struct cell_context *cell; + uint spu, buf; + + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); + if (!cell) + return NULL; + + memset(cell, 0, sizeof(*cell)); + + cell->winsys = cws; + cell->pipe.winsys = winsys; + cell->pipe.destroy = cell_destroy_context; + + /* queries */ + cell->pipe.is_format_supported = cell_is_format_supported; + cell->pipe.get_name = cell_get_name; + cell->pipe.get_vendor = cell_get_vendor; + cell->pipe.get_param = cell_get_param; + cell->pipe.get_paramf = cell_get_paramf; + + + /* state setters */ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_state = cell_bind_sampler_state; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + cell->pipe.set_constant_buffer = cell_set_constant_buffer; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; + + cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; + cell->pipe.set_vertex_element = cell_set_vertex_element; + + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + + cell->pipe.clear = cell_clear_surface; + cell->pipe.flush = cell_flush; + + /* textures */ + cell->pipe.texture_create = cell_texture_create; + cell->pipe.texture_release = cell_texture_release; + cell->pipe.get_tex_surface = cell_get_tex_surface; + + cell->pipe.set_sampler_texture = cell_set_sampler_texture; + +#if 0 + cell->pipe.begin_query = cell_begin_query; + cell->pipe.end_query = cell_end_query; + cell->pipe.wait_query = cell_wait_query; +#endif + + cell_init_surface_functions(cell); + + cell->draw = cell_draw_create(cell); + + cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); + + /* + * SPU stuff + */ + cell->num_spus = 6; + + cell_start_spus(cell); + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } + + return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 0000000000..3b63419b5e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "pipe/cell/common.h" + + +struct cell_vbuf_render; + +struct cell_vertex_shader_state +{ + struct pipe_shader_state shader; + void *draw_data; +}; + + +struct cell_fragment_shader_state +{ + struct pipe_shader_state shader; + void *data; +}; + + +struct cell_context +{ + struct pipe_context pipe; + + struct cell_winsys *winsys; + + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct cell_vertex_shader_state *vs; + const struct cell_fragment_shader_state *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; + ubyte *zsbuf_map; + + struct pipe_surface *tex_surf; + uint *tex_map; + + uint dirty; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *render_stage; + + /** For post-transformed vertex buffering: */ + struct cell_vbuf_render *vbuf_render; + struct draw_stage *vbuf; + + struct vertex_info vertex_info; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + + uint num_spus; + + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ + + /** [4] to ensure 16-byte alignment for each status word */ + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ + return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 0000000000..717cd8370f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX]); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *sp = cell_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + /* first, check that the primitive is not malformed. It is the + * state tracker's responsibility to do send only correctly formed + * primitives down. It currently isn't doing that though... + */ +#if 1 + count = draw_trim_prim( mode, count ); +#else + if (!draw_validate_prim( mode, count )) + assert(0); +#endif + + if (sp->dirty) + cell_update_derived( sp ); + +#if 0 + cell_map_surfaces(sp); +#endif + cell_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + void *buf = pipe->winsys->buffer_map(pipe->winsys, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + if (sp->vertex_buffer[i].buffer) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + } + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + } + + /* Note: leave drawing surfaces mapped */ + cell_unmap_constant_buffers(sp); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 0000000000..d5df4aa05f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +boolean cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 0000000000..f62bc4650c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "pipe/draw/draw_context.h" + + +void +cell_flush(struct pipe_context *pipe, unsigned flags) +{ + struct cell_context *cell = cell_context(pipe); + + if (flags & PIPE_FLUSH_SWAPBUFFERS) + flags |= PIPE_FLUSH_WAIT; + + draw_flush( cell->draw ); + cell_flush_int(pipe, flags); +} + + +/** internal flush */ +void +cell_flush_int(struct pipe_context *pipe, unsigned flags) +{ + static boolean flushing = FALSE; /* recursion catcher */ + struct cell_context *cell = cell_context(pipe); + uint i; + + ASSERT(!flushing); + flushing = TRUE; + + if (flags & PIPE_FLUSH_WAIT) { + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); + *cmd = CELL_CMD_FINISH; + } + + cell_batch_flush(cell); + +#if 0 + /* Send CMD_FINISH to all SPUs */ + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); + } +#endif + + if (flags & PIPE_FLUSH_WAIT) { + /* Wait for ack */ + for (i = 0; i < cell->num_spus; i++) { + uint k = wait_mbox_message(cell_global.spe_contexts[i]); + assert(k == CELL_CMD_FINISH); + } + } + + flushing = FALSE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 0000000000..eda351b1cb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags); + +extern void +cell_flush_int(struct pipe_context *pipe, unsigned flags); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 0000000000..4ab277a4b2 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Last stage of 'draw' pipeline: send tris to SPUs. + * \author Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_private.h" + + +struct render_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ + return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 + struct render_stage *render = render_stage(stage); + struct cell_context *sp = render->cell; + const struct pipe_shader_state *fs = &render->cell->fs->shader; + render->quad.nr_attrs = render->cell->nr_frag_attrs; + + render->firstFpInput = fs->input_semantic_name[0]; + + sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ + struct render_stage *render = render_stage(stage); + /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->vertex_size = cell->vertex_info->size * 4; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; + + cell->prim_buffer.xmin = 1e100; + cell->prim_buffer.ymin = 1e100; + cell->prim_buffer.xmax = -1e100; + cell->prim_buffer.ymax = -1e100; + + /* XXX temporary, need to double-buffer the prim buffer until we get + * a real command buffer/list system. + */ + cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new draw/render stage. This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ + struct render_stage *render = CALLOC_STRUCT(render_stage); + + render->cell = cell; + render->stage.draw = cell->draw; + render->stage.begin = render_begin; + render->stage.point = render_point; + render->stage.line = render_line; + render->stage.tri = render_tri; + render->stage.end = render_end; + render->stage.reset_stipple_counter = reset_stipple_counter; + render->stage.destroy = render_destroy; + + /* + render->quad.coef = render->coef; + render->quad.posCoef = &render->posCoef; + */ + + return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 0000000000..826dcbafeb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 0000000000..7c83a47e57 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,155 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +struct cell_global_info cell_global; + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ + spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ + do { + unsigned data; + int count = spe_out_mbox_read(ctx, &data, 1); + + if (count == 1) { + return data; + } + + if (count < 0) { + /* error */ ; + } + } while (1); +} + + +static void *cell_thread_function(void *arg) +{ + struct cell_init_info *init = (struct cell_init_info *) arg; + unsigned entry = SPE_DEFAULT_ENTRY; + + ASSERT_ALIGN16(init); + + if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, + init, NULL, NULL) < 0) { + fprintf(stderr, "spe_context_run() failed\n"); + exit(1); + } + + pthread_exit(NULL); +} + + +/** + * Create the SPU threads + */ +void +cell_start_spus(struct cell_context *cell) +{ + uint i, j; + + assert(cell->num_spus <= MAX_SPUS); + + ASSERT_ALIGN16(&cell_global.command[0]); + ASSERT_ALIGN16(&cell_global.command[1]); + + ASSERT_ALIGN16(&cell_global.inits[0]); + ASSERT_ALIGN16(&cell_global.inits[1]); + + for (i = 0; i < cell->num_spus; i++) { + cell_global.inits[i].id = i; + cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].cmd = &cell_global.command[i]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; + } + cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + + cell_global.spe_contexts[i] = spe_context_create(0, NULL); + if (!cell_global.spe_contexts[i]) { + fprintf(stderr, "spe_context_create() failed\n"); + exit(1); + } + + if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { + fprintf(stderr, "spe_program_load() failed\n"); + exit(1); + } + + pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, + &cell_global.inits[i]); + } +} + + +/** + * Tell all the SPUs to stop/exit. + */ +void +cell_spu_exit(struct cell_context *cell) +{ + uint i; + + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); + } + + /* wait for threads to exit */ + for (i = 0; i < cell->num_spus; i++) { + void *value; + pthread_join(cell_global.spe_threads[i], &value); + cell_global.spe_threads[i] = 0; + cell_global.spe_contexts[i] = 0; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 0000000000..19eff94f96 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include +#include +#include "pipe/cell/common.h" + +#include "cell_context.h" + + +#define MAX_SPUS 8 + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ + /** + * SPU/SPE handles, etc + */ + spe_context_ptr_t spe_contexts[MAX_SPUS]; + pthread_t spe_threads[MAX_SPUS]; + + /** + * Data sent to SPUs + */ + struct cell_init_info inits[MAX_SPUS]; + struct cell_command command[MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 0000000000..3a71ba14fa --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,115 @@ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT 0x1 +#define CELL_NEW_RASTERIZER 0x2 +#define CELL_NEW_FS 0x4 +#define CELL_NEW_BLEND 0x8 +#define CELL_NEW_CLIP 0x10 +#define CELL_NEW_SCISSOR 0x20 +#define CELL_NEW_STIPPLE 0x40 +#define CELL_NEW_FRAMEBUFFER 0x80 +#define CELL_NEW_ALPHA_TEST 0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER 0x400 +#define CELL_NEW_TEXTURE 0x800 +#define CELL_NEW_VERTEX 0x1000 +#define CELL_NEW_VS 0x2000 +#define CELL_NEW_CONSTANTS 0x4000 +#define CELL_NEW_VERTEX_INFO 0x8000 + + + +extern void +cell_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + + + +extern void * +cell_create_blend_state(struct pipe_context *, const struct pipe_blend_state *); +extern void cell_bind_blend_state(struct pipe_context *, void *); +extern void cell_delete_blend_state(struct pipe_context *, void *); + +extern void cell_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + + +void * +cell_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); + +extern void +cell_bind_sampler_state(struct pipe_context *, unsigned, void *); + +extern void +cell_delete_sampler_state(struct pipe_context *, void *); + + +extern void * +cell_create_depth_stencil_alpha_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); + +extern void +cell_bind_depth_stencil_alpha_state(struct pipe_context *, void *); + +extern void +cell_delete_depth_stencil_alpha_state(struct pipe_context *, void *); + + +void *cell_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_fs_state(struct pipe_context *, void *); +void cell_delete_fs_state(struct pipe_context *, void *); +void *cell_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void cell_bind_vs_state(struct pipe_context *, void *); +void cell_delete_vs_state(struct pipe_context *, void *); + + +void * +cell_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void cell_bind_rasterizer_state(struct pipe_context *, void *); +void cell_delete_rasterizer_state(struct pipe_context *, void *); + + +void cell_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void cell_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture); + +void cell_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void cell_set_texture_state( struct pipe_context *, + unsigned unit, struct pipe_texture * ); + +void cell_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void cell_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + +void cell_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + + +void cell_update_derived( struct cell_context *softpipe ); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c new file mode 100644 index 0000000000..4fc60548c8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (const struct pipe_blend_state *)blend; + + cell->dirty |= CELL_NEW_BLEND; +} + + +void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + + +void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil + = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c new file mode 100644 index 0000000000..4f43665941 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "cell_context.h" +#include "cell_state.h" +#include "pipe/draw/draw_context.h" + + +void cell_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +void cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +void cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +void cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 0000000000..56daf5dfde --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,192 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +static int +find_vs_output(const struct pipe_shader_state *vs, + uint semantic_name, + uint semantic_index) +{ + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + return -1; +} + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ + const struct pipe_shader_state *vs = &cell->vs->shader; + const struct pipe_shader_state *fs = &cell->fs->shader; + const enum interp_mode colorInterp + = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo = &cell->vertex_info; + uint i; + int src; + +#if 0 + if (cell->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; + } +#endif + + /* reset vinfo */ + vinfo->num_attribs = 0; + + /* we always want to emit vertex pos */ + src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + for (i = 0; i < fs->num_inputs; i++) { + switch (fs->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + /* already done above */ + break; + + case TGSI_SEMANTIC_COLOR: + src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); +#if 1 + if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ + src = 0; +#endif + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + fs->input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + /* XXX only signal this if format really changes */ + cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ + unsigned surfWidth, surfHeight; + + if (sp->framebuffer.num_cbufs > 0) { + surfWidth = sp->framebuffer.cbufs[0]->width; + surfHeight = sp->framebuffer.cbufs[0]->height; + } + else { + /* no surface? */ + surfWidth = sp->scissor.maxx; + surfHeight = sp->scissor.maxy; + } + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} +#endif + + + +void cell_update_derived( struct cell_context *cell ) +{ + if (cell->dirty & (CELL_NEW_RASTERIZER | + CELL_NEW_FS | + CELL_NEW_VS)) + calculate_vertex_layout( cell ); + +#if 0 + if (cell->dirty & (CELL_NEW_SCISSOR | + CELL_NEW_DEPTH_STENCIL_ALPHA | + CELL_NEW_FRAMEBUFFER)) + compute_cliprect(cell); +#endif + + cell_emit_state(cell); + + cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 0000000000..5d2a786449 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + *dst = cmd; + memcpy(dst + 1, state, state_size); +} + + + +void +cell_emit_state(struct cell_context *cell) +{ + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { + struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; + struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + struct cell_command_framebuffer *fb + = cell_batch_alloc(cell, sizeof(*fb)); + fb->opcode = CELL_CMD_STATE_FRAMEBUFFER; + fb->color_start = cell->cbuf_map[0]; + fb->color_format = cbuf->format; + fb->depth_start = cell->zsbuf_map; + fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; + fb->width = cell->framebuffer.cbufs[0]->width; + fb->height = cell->framebuffer.cbufs[0]->height; + } + + if (cell->dirty & CELL_NEW_BLEND) { + emit_state_cmd(cell, CELL_CMD_STATE_BLEND, + cell->blend, + sizeof(struct pipe_blend_state)); + } + + if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, + cell->depth_stencil, + sizeof(struct pipe_depth_stencil_alpha_state)); + } + + if (cell->dirty & CELL_NEW_SAMPLER) { + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + struct cell_command_texture texture; + if (cell->texture[0]) { + texture.start = cell->texture[0]->tiled_data; + texture.width = cell->texture[0]->base.width[0]; + texture.height = cell->texture[0]->base.height[0]; + } + else { + texture.start = NULL; + texture.width = 0; + texture.height = 0; + } + + emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, + &texture, sizeof(struct cell_command_texture)); + } + + if (cell->dirty & CELL_NEW_VERTEX_INFO) { + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 0000000000..59f8affe8d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c new file mode 100644 index 0000000000..3f46a87d18 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/draw/draw_context.h" +#if 0 +#include "pipe/p_shader_tokens.h" +#include "pipe/llvm/gallivm.h" +#include "pipe/tgsi/util/tgsi_dump.h" +#include "pipe/tgsi/exec/tgsi_sse2.h" +#endif + +#include "cell_context.h" +#include "cell_state.h" + + +void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + /*struct cell_context *cell = cell_context(pipe);*/ + struct cell_fragment_shader_state *state; + + state = CALLOC_STRUCT(cell_fragment_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + +#if 0 + if (cell->dump_fs) { + tgsi_dump(state->shader.tokens, 0); + } + +#if defined(__i386__) || defined(__386__) + if (cell->use_sse) { + x86_init_func( &state->sse2_program ); + tgsi_emit_sse2_fs( state->shader.tokens, &state->sse2_program ); + } +#endif + +#ifdef MESA_LLVM + state->llvm_prog = 0; + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif +#endif + + return state; +} + + +void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = (struct cell_fragment_shader_state *) fs; + + cell->dirty |= CELL_NEW_FS; +} + + +void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *state = + (struct cell_fragment_shader_state *) fs; + + FREE( state ); +} + + +void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *state; + + state = CALLOC_STRUCT(cell_vertex_shader_state); + if (!state) + return NULL; + + state->shader = *templ; + + state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = (const struct cell_vertex_shader_state *) vs; + + draw_bind_vertex_shader(cell->draw, cell->vs->draw_data); + + cell->dirty |= CELL_NEW_VS; +} + + +void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + struct cell_vertex_shader_state *state = + (struct cell_vertex_shader_state *) vs; + + draw_delete_vertex_shader(cell->draw, state->draw_data); + FREE( state ); +} + + +void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + pipe_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + cell->dirty |= CELL_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c new file mode 100644 index 0000000000..d8128ece54 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" + + + +struct spu_rasterizer_state +{ + unsigned flatshade:1; +#if 0 + unsigned light_twoside:1; + unsigned front_winding:2; /**< PIPE_WINDING_x */ + unsigned cull_mode:2; /**< PIPE_WINDING_x */ + unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned offset_cw:1; + unsigned offset_ccw:1; +#endif + unsigned scissor:1; + unsigned poly_smooth:1; + unsigned poly_stipple_enable:1; + unsigned point_smooth:1; +#if 0 + unsigned point_sprite:1; + unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ +#endif + unsigned multisample:1; /* XXX maybe more ms state in future */ + unsigned line_smooth:1; + unsigned line_stipple_enable:1; + unsigned line_stipple_factor:8; /**< [1..256] actually */ + unsigned line_stipple_pattern:16; +#if 0 + unsigned bypass_clipping:1; +#endif + unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ + + float line_width; + float point_size; /**< used when no per-vertex size */ +#if 0 + float offset_units; + float offset_scale; + ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ +#endif +}; + + + +void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *setup) +{ + struct pipe_rasterizer_state *state + = MALLOC(sizeof(struct pipe_rasterizer_state)); + memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); + return state; +} + + +void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, setup); + + cell->rasterizer = (struct pipe_rasterizer_state *)setup; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c new file mode 100644 index 0000000000..ade6cc8338 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + +void +cell_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + assert(unit < PIPE_MAX_SAMPLERS); + cell->sampler[unit] = (struct pipe_sampler_state *)sampler; + + cell->dirty |= CELL_NEW_SAMPLER; +} + + +void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +void +cell_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, + struct pipe_texture *texture) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->texture[sampler] = texture; + + cell_update_texture_mapping(cell); + + cell->dirty |= CELL_NEW_TEXTURE; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_surface.c b/src/gallium/drivers/cell/ppu/cell_state_surface.c new file mode 100644 index 0000000000..287610b76b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_surface.c @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_state.h" + + +void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* update my state */ + cell->framebuffer = *fb; + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 0000000000..0f01e920f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "pipe/draw/draw_context.h" + + +void +cell_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *attrib) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_element[index] = *attrib; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_element(cell->draw, index, attrib); +} + + +void +cell_set_vertex_buffer(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer) +{ + struct cell_context *cell = cell_context(pipe); + assert(index < PIPE_ATTRIB_MAX); + cell->vertex_buffer[index] = *buffer; /* struct copy */ + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_buffer(cell->draw, index, buffer); +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 0000000000..fca93e4742 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/util/p_tile.h" +#include "cell_context.h" +#include "cell_surface.h" + + +/* Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +cell_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, width, height, src, src_pitch, srcx, srcy); + + pipe_surface_unmap(dst); +} + + +static void +cell_surface_copy(struct pipe_context *pipe, + unsigned do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + assert( dst->cpp == src->cpp ); + + pipe_copy_rect(pipe_surface_map(dst), + dst->cpp, + dst->pitch, + dstx, dsty, + width, height, + pipe_surface_map(src), + do_flip ? -src->pitch : src->pitch, + srcx, do_flip ? 1 - srcy - height : srcy); + + pipe_surface_unmap(src); + pipe_surface_unmap(dst); +} + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fill a rectangular sub-region. Need better logic about when to + * push buffers into AGP - will currently do so whenever possible. + */ +static void +cell_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + unsigned i, j; + void *dst_map = pipe_surface_map(dst); + + assert(dst->pitch > 0); + assert(width <= dst->pitch); + + switch (dst->cpp) { + case 1: + { + ubyte *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + memset(row, value, width); + row += dst->pitch; + } + } + break; + case 2: + { + ushort *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = (ushort) value; + row += dst->pitch; + } + } + break; + case 4: + { + unsigned *row = get_pointer(dst, dst_map, dstx, dsty); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) + row[j] = value; + row += dst->pitch; + } + } + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->pitch * 4; + } + } + break; + default: + assert(0); + break; + } + + pipe_surface_unmap( dst ); +} + + +void +cell_init_surface_functions(struct cell_context *cell) +{ + cell->pipe.surface_copy = cell_surface_copy; + cell->pipe.surface_fill = cell_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 0000000000..9e58f32944 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 0000000000..c8ef36002f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,252 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for ( level = 0 ; level <= pt->last_level ; level++ ) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) +{ + struct cell_texture *spt = CALLOC_STRUCT(cell_texture); + if (!spt) + return NULL; + + spt->base = *templat; + + cell_texture_layout(spt); + + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + return &spt->base; +} + + +void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct cell_texture *spt = cell_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(pipe->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +/** + * Called via pipe->get_tex_surface() + */ +struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct cell_texture *spt = cell_texture(pt); + struct pipe_surface *ps; + + ps = pipe->winsys->surface_alloc(pipe->winsys); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(pipe->winsys, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + + +static void +tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = h / tile_size, w_t = w / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* fill in tile (i, j) */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + for (i = 0; i < tile_size; i++) { + for (j = 0; j < tile_size; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + *tdst++ = src[srci * h + srcj]; + } + } + } + } +} + + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_tile_texture(struct cell_context *cell, + struct cell_texture *texture) +{ + uint face = 0, level = 0, zslice = 0; + struct pipe_surface *surf; + const uint w = texture->base.width[0], h = texture->base.height[0]; + const uint *src; + + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); + + surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf); + + if (texture->tiled_data) { + align_free(texture->tiled_data); + } + texture->tiled_data = align_malloc(w * h * 4, 16); + + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); +} + + + +void +cell_update_texture_mapping(struct cell_context *cell) +{ + uint face = 0, level = 0, zslice = 0; + + if (cell->texture[0]) + cell_tile_texture(cell, cell->texture[0]); +#if 0 + if (cell->tex_surf && cell->tex_map) { + pipe_surface_unmap(cell->tex_surf); + cell->tex_map = NULL; + } + + /* XXX free old surface */ + + cell->tex_surf = cell_get_tex_surface(&cell->pipe, + &cell->texture[0]->base, + face, level, zslice); + + cell->tex_map = pipe_surface_map(cell->tex_surf); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 0000000000..0264fed88e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct pipe_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; + + void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ + return (struct cell_texture *) pt; +} + + + +extern struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); + +extern void +cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); + +extern struct pipe_surface * +cell_get_tex_surface(struct pipe_context *pipe, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice); + + +extern void +cell_update_texture_mapping(struct cell_context *cell); + + +#endif /* CELL_TEXTURE */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 0000000000..e9fafe492e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "pipe/draw/draw_vbuf.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ + struct vbuf_render base; + struct cell_context *cell; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ + return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(&cell->pipe, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static void +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->prim = prim; + /*printf("cell_set_prim %u\n", prim);*/ +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, + const ushort *indices, + uint nr_indices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + float xmin, ymin, xmax, ymax; + uint i; + uint nr_vertices = 0, min_index = ~0; + const void *vertices = cvbr->vertex_buffer; + const uint vertex_size = cvbr->vertex_size; + + for (i = 0; i < nr_indices; i++) { + if (indices[i] > nr_vertices) + nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; + } + nr_vertices++; + +#if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", + nr_indices, nr_vertices); + printf(" "); + for (i = 0; i < nr_indices; i += 3) { + printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); + } + printf("\n"); +#elif 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", + nr_indices, nr_vertices, + indices[0], indices[1], indices[2]); + printf("ind space = %u, vert space = %u, space = %u\n", + nr_indices * 2, + nr_vertices * 4 * cell->vertex_info.size, + cell_batch_free_space(cell)); +#endif + + /* compute x/y bounding box */ + xmin = ymin = 1e50; + xmax = ymax = -1e50; + for (i = min_index; i < nr_vertices; i++) { + const float *v = (float *) ((ubyte *) vertices + i * vertex_size); + if (v[0] < xmin) + xmin = v[0]; + if (v[0] > xmax) + xmax = v[0]; + if (v[1] < ymin) + ymin = v[1]; + if (v[1] > ymax) + ymax = v[1]; + } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif + + if (cvbr->prim != PIPE_PRIM_TRIANGLES) + return; /* only render tris for now */ + + /* build/insert batch RENDER command */ + { + const uint index_bytes = ROUNDUP8(nr_indices * 2); + const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + + struct cell_command_render *render + = (struct cell_command_render *) + cell_batch_alloc(cell, batch_size); + + render->opcode = CELL_CMD_RENDER; + render->prim_type = cvbr->prim; + + render->num_indexes = nr_indices; + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ + render->vertex_size = 4 * cell->vertex_info.size; + render->num_verts = nr_vertices; + if (ALLOW_INLINE_VERTS && + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); + memcpy(dst, vertices, vertex_bytes); + render->inline_verts = TRUE; + render->vertex_buf = ~0; + } + else { + /* vertex data in separate buffer */ + render->inline_verts = FALSE; + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; + } + + render->xmin = xmin; + render->ymin = ymin; + render->xmax = xmax; + render->ymax = ymax; + } + +#if 0 + /* helpful for debug */ + cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->cell->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ + assert(cell->draw); + + cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + + cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; + cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; + cell->vbuf_render->base.draw = cell_vbuf_draw; + cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; + cell->vbuf_render->base.destroy = cell_vbuf_destroy; + + cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif + + cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 0000000000..d265cbf770 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..80dd500b34 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,120 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->format = draw->vertex_element[i].src_format; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->shader.num_outputs = draw->num_vs_outputs; + vs->shader.declarations = (uintptr_t) draw->machine.Declarations; + vs->shader.num_declarations = draw->machine.NumDeclarations; + vs->shader.instructions = (uintptr_t) draw->machine.Instructions; + vs->shader.num_instructions = draw->machine.NumInstructions; + vs->shader.uniforms = (uintptr_t) draw->user.constants; + vs->shader.immediates = (uintptr_t) draw->machine.Imms; + vs->shader.num_immediates = draw->machine.ImmLimit / 4; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = vs->vOut[0]; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + } + + draw->vs.queue_nr = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c new file mode 100644 index 0000000000..ebabce3c8f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "cell_winsys.h" + + +struct cell_winsys * +cell_get_winsys(uint format) +{ + struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); + if (cws) + cws->preferredFormat = format; + return cws; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 0000000000..ae2af5696b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ + uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 0000000000..f202971d73 --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,72 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/linux-cell + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ + spu_main.c \ + spu_blend.c \ + spu_render.c \ + spu_texture.c \ + spu_tile.c \ + spu_tri.c \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = -I$(TOP)/src/mesa + + +.c.o: + $(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: + $(SPU_CC) $(SPU_CFLAGS) -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) + $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) + $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) + $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: + rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_blend.c b/src/gallium/drivers/cell/spu/spu_blend.c new file mode 100644 index 0000000000..23ec0eeb45 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "spu_main.h" +#include "spu_blend.h" +#include "spu_colorpack.h" + + +void +blend_quad(uint itx, uint ity, vector float colors[4]) +{ + /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ + vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); + vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); + vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); + vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); + + vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); + vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); + vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); + vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); + + vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); + vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); + vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); + vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); + + colors[0] = spu_add(spu_mul(colors[0], alpha00), + spu_mul(fbc00, one_minus_alpha00)); + colors[1] = spu_add(spu_mul(colors[1], alpha01), + spu_mul(fbc01, one_minus_alpha01)); + colors[2] = spu_add(spu_mul(colors[2], alpha10), + spu_mul(fbc10, one_minus_alpha10)); + colors[3] = spu_add(spu_mul(colors[3], alpha11), + spu_mul(fbc11, one_minus_alpha11)); +} + diff --git a/src/gallium/drivers/cell/spu/spu_blend.h b/src/gallium/drivers/cell/spu/spu_blend.h new file mode 100644 index 0000000000..2b594b578b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_blend.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_BLEND_H +#define SPU_BLEND_H + + +extern void +blend_quad(uint itx, uint ity, vector float colors[4]); + + +#endif /* SPU_BLEND_H */ diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..e9fee8a3a6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_color(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 0, 0, 0, 0, + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15, + 0, 0, 0, 0}) ); + + return spu_convtf(color_u4, 32); +} + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e51008b9b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1948 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +#include "pipe/tgsi/util/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + qword zero; + qword not_zero; + uint i; + + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + zero = si_xor(zero, zero); + not_zero = si_xori(zero, 0xff); + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned char buffer[32] ALIGN16_ATTRIB; + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + const uint64_t addr = (uint64_t)(uintptr_t) ptr; + const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + + mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) + + (sizeof(float) * swizzle)], sizeof(float)); + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + assert( index->i[1] < (int) mach->ImmLimit ); + assert( index->i[2] < (int) mach->ImmLimit ); + assert( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + + _transpose_matrix4x4(out, rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; + struct tgsi_full_declaration decl; + unsigned long decl_addr = (unsigned long) (mach->Declarations+i); + unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + + mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); + exec_declaration( mach, &decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; + struct tgsi_full_instruction inst; + unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); + unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); + + assert(pc < mach->NumInstructions); + mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); + exec_instruction( mach, & inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 0000000000..b4c7661ef6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_ADDRS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 0000000000..e375197fe6 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,567 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_texture.h" +#include "spu_tile.h" +//#include "spu_test.h" +#include "spu_vertex_shader.h" +#include "pipe/cell/common.h" +#include "pipe/p_defines.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +*/ + +boolean Debug = FALSE; + +struct spu_global spu; + +struct spu_vs_context draw; + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +static void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (Debug) + printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, + clear->surface, clear->value); + +#define CLEAR_OPT 1 +#if CLEAR_OPT + /* set all tile's status to CLEAR */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + spu.fb.color_clear_value = clear->value; + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + spu.fb.depth_clear_value = clear->value; + } + return; +#endif + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + clear_c_tile(&spu.ctile); + } + else { + spu.fb.depth_clear_value = clear->value; + clear_z_tile(&spu.ztile); + } + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + /* XXX we don't want this here, but it fixes bad tile results */ + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif + + if (Debug) + printf("SPU %u: CLEAR SURF done\n", spu.init.id); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + if (Debug) + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + if (Debug) + printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + spu.init.id, + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) + spu.fb.zsize = 4; + else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) + spu.fb.zsize = 2; + else + spu.fb.zsize = 0; + + if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else + ASSERT(0); +} + + +static void +cmd_state_blend(const struct pipe_blend_state *state) +{ + if (Debug) + printf("SPU %u: BLEND: enabled %d\n", + spu.init.id, + state->blend_enable); + + memcpy(&spu.blend, state, sizeof(*state)); +} + + +static void +cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +{ + if (Debug) + printf("SPU %u: DEPTH_STENCIL: ztest %d\n", + spu.init.id, + state->depth.enabled); + + memcpy(&spu.depth_stencil, state, sizeof(*state)); +} + + +static void +cmd_state_sampler(const struct pipe_sampler_state *state) +{ + if (Debug) + printf("SPU %u: SAMPLER\n", + spu.init.id); + + memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + if (Debug) + printf("SPU %u: TEXTURE at %p size %u x %u\n", + spu.init.id, texture->start, texture->width, texture->height); + + memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + if (Debug) { + printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, + vinfo->num_attribs); + } + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_ATTRIB_MAX); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_finish(void) +{ + if (Debug) + printf("SPU %u: FINISH\n", spu.init.id); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + if (Debug) + printf("SPU %u: BATCH buffer %u, len %u, from %p\n", + spu.init.id, buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + release_buffer(buf); + + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_STATE_BLEND: + cmd_state_blend((struct pipe_blend_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + break; + case CELL_CMD_STATE_DEPTH_STENCIL: + cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + break; + case CELL_CMD_STATE_SAMPLER: + cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + break; + case CELL_CMD_STATE_TEXTURE: + cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + if (Debug) + printf("SPU %u: BATCH complete\n", spu.init.id); +} + + +/** + * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. + */ +static void +main_loop(void) +{ + struct cell_command cmd; + int exitFlag = 0; + + if (Debug) + printf("SPU %u: Enter main loop\n", spu.init.id); + + ASSERT((sizeof(struct cell_command) & 0xf) == 0); + ASSERT_ALIGN16(&cmd); + + while (!exitFlag) { + unsigned opcode; + int tag = 0; + + if (Debug) + printf("SPU %u: Wait for cmd...\n", spu.init.id); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + + if (Debug) + printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + + /* command payload */ + mfc_get(&cmd, /* dest */ + (unsigned int) spu.init.cmd, /* src */ + sizeof(struct cell_command), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + /* + * NOTE: most commands should be contained in a batch buffer + */ + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + if (Debug) + printf("SPU %u: EXIT\n", spu.init.id); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: + spu_execute_vertex_shader(&draw, &cmd.vs); + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode!\n"); + } + + } + + if (Debug) + printf("SPU %u: Exit main loop\n", spu.init.id); +} + + + +static void +one_time_init(void) +{ + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); +} + + + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter. In others it takes 'unsigned long long'. Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ + int tag = 0; + + (void) speid; + + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + + one_time_init(); + + if (Debug) + printf("SPU: main() speid=%lu\n", speid); + + mfc_get(&spu.init, /* dest */ + (unsigned int) argp, /* src */ + sizeof(struct cell_init_info), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif + + main_loop(); + + return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 0000000000..1710a17512 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_vertex.h" +#include "pipe/p_state.h" + + + +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + +struct spu_framebuffer { + void *color_start; /**< addr of color surface in main memory */ + void *depth_start; /**< addr of depth surface in main memory */ + enum pipe_format color_format; + enum pipe_format depth_format; + uint width, height; /**< size in pixels */ + uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; + + uint zsize; /**< 0, 2 or 4 bytes per Z */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in singleton object of this type: + */ +struct spu_global +{ + struct cell_init_info init; + + struct spu_framebuffer fb; + struct pipe_blend_state blend_stencil; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_blend_state blend; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct cell_command_texture texture; + + struct vertex_info vertex_info; + + /* XXX more state to come */ + + + /** current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + + /** for converting RGBA to PIPE_FORMAT_x colors */ + vector unsigned char color_shuffle; + + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + + vector float (*sample_texture)(vector float texcoord); + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; +extern boolean Debug; + + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR 10 +#define TAG_VERTEX_BUFFER 11 +#define TAG_READ_TILE_COLOR 12 +#define TAG_READ_TILE_Z 13 +#define TAG_WRITE_TILE_COLOR 14 +#define TAG_WRITE_TILE_Z 15 +#define TAG_INDEX_BUFFER 16 +#define TAG_BATCH_BUFFER 17 +#define TAG_MISC 18 +#define TAG_TEXTURE_TILE 19 +#define TAG_INSTRUCTION_FETCH 20 + + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..932fb500b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 0000000000..fbcdc5ec31 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "pipe/cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 0000000000..3962aaa4a9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" + + +/** + * Number of texture tiles to cache. + * Note that this will probably be the largest consumer of SPU local store/ + * memory for this driver! + */ +#define CACHE_SIZE 16 + +static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; + +static vector unsigned int tex_tile_xy[CACHE_SIZE]; + + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + /* XXX memset? */ + uint i; + for (i = 0; i < CACHE_SIZE; i++) { + tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); + } +} + + +/** + * Return the cache pos/index which corresponds to tile (tx,ty) + */ +static INLINE uint +cache_pos(vector unsigned int txty) +{ + uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; + return pos; +} + + +/** + * Make sure the tile for texel (i,j) is present, return its position/index + * in the cache. + */ +static uint +get_tex_tile(vector unsigned int ij) +{ + /* tile address: tx,ty */ + const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ + const uint pos = cache_pos(txty); + + if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || + (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { + + /* texture cache miss, fetch tile from main memory */ + const uint tiles_per_row = spu.texture.width / TILE_SIZE; + const uint bytes_per_tile = sizeof(tile_t); + const void *src = (const ubyte *) spu.texture.start + + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; + + printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", + spu.init.id, + spu_extract(txty,0), + spu_extract(txty,1), + pos, + spu_extract(tex_tile_xy[pos],0), + spu_extract(tex_tile_xy[pos],1)); + + ASSERT_ALIGN16(tex_tiles[pos].ui); + ASSERT_ALIGN16(src); + + mfc_get(tex_tiles[pos].ui, /* dest */ + (unsigned int) src, + bytes_per_tile, /* size */ + TAG_TEXTURE_TILE, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask(1 << TAG_TEXTURE_TILE); + + tex_tile_xy[pos] = txty; + } + else { +#if 0 + printf("SPU %u: tex cache HIT at %d, %d\n", + spu.init.id, tx, ty); +#endif + } + + return pos; +} + + +/** + * Get texture sample at texcoord. + * XXX this is extremely primitive for now. + */ +vector float +sample_texture_nearest(vector float texcoord) +{ + vector float tc = spu_mul(texcoord, spu.tex_size); + vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ + itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ + uint pos = get_tex_tile(itc); + uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + return spu_unpack_A8R8G8B8(texel); +} + + +vector float +sample_texture_bilinear(vector float texcoord) +{ + static const vector unsigned int offset10 = {1, 0, 0, 0}; + static const vector unsigned int offset01 = {0, 1, 0, 0}; + + vector float tc = spu_mul(texcoord, spu.tex_size); + tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + + /* integer texcoords S,T: */ + vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ + vector unsigned int itc01 = spu_add(itc00, offset01); + vector unsigned int itc10 = spu_add(itc00, offset10); + vector unsigned int itc11 = spu_add(itc10, offset01); + + /* mask (GL_REPEAT) */ + itc00 = spu_and(itc00, spu.tex_size_mask); + itc01 = spu_and(itc01, spu.tex_size_mask); + itc10 = spu_and(itc10, spu.tex_size_mask); + itc11 = spu_and(itc11, spu.tex_size_mask); + + /* intra tile addr */ + vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); + vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); + vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); + vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + + /* get tile cache positions */ + uint pos00 = get_tex_tile(itc00); + uint pos01, pos10, pos11; + if ((spu_extract(ij00, 0) < TILE_SIZE-1) && + (spu_extract(ij00, 1) < TILE_SIZE-1)) { + /* all texels are in the same tile */ + pos01 = pos10 = pos11 = pos00; + } + else { + pos01 = get_tex_tile(itc01); + pos10 = get_tex_tile(itc10); + pos11 = get_tex_tile(itc11); + } + + /* get texels from tiles and convert to float[4] */ + vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); + vector signed int itc1024 = spu_convts(tc1024, 0); + itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); + vector float weight = spu_convtf(itc1024, 10); + + /* smeared frac and 1-frac */ + vector float sfrac = spu_splats(spu_extract(weight, 0)); + vector float tfrac = spu_splats(spu_extract(weight, 1)); + vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); + vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); + + /* multiply the samples (colors) by the S/T weights */ + texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); + texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); + texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); + texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); + + /* compute sum of weighted samples */ + vector float texel_sum = spu_add(texel00, texel01); + texel_sum = spu_add(texel_sum, texel10); + texel_sum = spu_add(texel_sum, texel11); + + return texel_sum; +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 0000000000..95eb87080f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern vector float +sample_texture_nearest(vector float texcoord); + + +extern vector float +sample_texture_bilinear(vector float texcoord); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 0000000000..12dc246328 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + src += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("get_tile: dest: %p src: 0x%x size: %d\n", + tile, (unsigned int) src, bytesPerTile); + */ + mfc_get(tile->ui, /* dest in local memory */ + (unsigned int) src, /* src in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + dst += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", + spu.init.id, + tile, (unsigned int) dst, bytesPerTile); + */ + mfc_put((void *) tile->ui, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 0000000000..e53340a55a --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include +#include +#include "spu_main.h" +#include "pipe/cell/common.h" + + + +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ + memset32((uint*) ctile->ui, + spu.fb.color_clear_value, + TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + memset16((ushort*) ztile->us, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } + else { + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + memset32((uint*) ztile->ui, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 0000000000..be9624cf7d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,926 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_util.h" +#include "spu_blend.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + +#include "spu_ztest.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + +typedef union +{ + vector float v; + float f[4]; +} float4; + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { + vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999)) + + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +struct interp_coef +{ + float4 a0; + float4 dadx; + float4 dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + uint tx, ty; + + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + +#if 0 + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#else + struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +#endif + +#if 0 + struct quad_header quad; +#endif + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + + +static struct setup_stage setup; + + + + +#if 0 +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} +#endif + +#if 0 +/** + * Clip setup.quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_stage *setup) +{ + const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup.quad.x0 >= maxx || + setup.quad.y0 >= maxy || + setup.quad.x0 + 1 < minx || + setup.quad.y0 + 1 < miny) { + /* totally clipped */ + setup.quad.mask = 0x0; + return; + } + if (setup.quad.x0 < minx) + setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup.quad.y0 < miny) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup.quad.x0 == maxx - 1) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup.quad.y0 == maxy - 1) + setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} +#endif + +#if 0 +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_stage *setup) +{ + quad_clip(setup); + if (setup.quad.mask) { + struct softpipe_context *sp = setup.softpipe; + sp->quad.first->run(sp->quad.first, &setup.quad); + } +} +#endif + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be compute. + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float result[4]) +{ + switch (spu.vertex_info.interp_mode[slot]) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + break; + + case INTERP_LINEAR: + /* fall-through, for now */ + default: + { + register vector float dadx = setup.coef[slot].dadx.v; + register vector float dady = setup.coef[slot].dady.v; + register vector float topLeft + = spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } + } +} + + +static INLINE vector float +eval_z(float x, float y) +{ + const uint slot = 0; + const float dzdx = setup.coef[slot].dadx.f[2]; + const float dzdy = setup.coef[slot].dady.f[2]; + const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); +} + + +static INLINE mask_t +do_depth_test(int x, int y, mask_t quadmask) +{ + float4 zvals; + mask_t mask; + + zvals.v = eval_z((float) x, (float) y); + + if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { + int ix = (x - setup.cliprect_minx) / 4; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); + } + else { + int ix = (x - setup.cliprect_minx) / 2; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); + } + + if (spu_extract(spu_orx(mask), 0)) + spu.cur_ztile_status = TILE_STATUS_DIRTY; + + return mask; +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask ) +{ +#if 0 + struct softpipe_context *sp = setup.softpipe; + setup.quad.x0 = x; + setup.quad.y0 = y; + setup.quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup.quad); +#else + + if (spu.depth_stencil.depth.enabled) { + mask = do_depth_test(x, y, mask); + } + + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + const vector unsigned char shuffle = spu.color_shuffle; + vector float colors[4]; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + + if (spu.texture.start) { + /* texture mapping */ + vector float texcoords[4]; + eval_coeff(2, (float) x, (float) y, texcoords); + + if (spu_extract(mask, 0)) + colors[0] = spu.sample_texture(texcoords[0]); + if (spu_extract(mask, 1)) + colors[1] = spu.sample_texture(texcoords[1]); + if (spu_extract(mask, 2)) + colors[2] = spu.sample_texture(texcoords[2]); + if (spu_extract(mask, 3)) + colors[3] = spu.sample_texture(texcoords[3]); + } + else { + /* simple shading */ + eval_coeff(1, (float) x, (float) y, colors); + } + +#if 1 + if (spu.blend.blend_enable) + blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); +#endif + + if (spu_extract(mask, 0)) + spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); + if (spu_extract(mask, 1)) + spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); + if (spu_extract(mask, 2)) + spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); + if (spu_extract(mask, 3)) + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + +#if 0 + /* SIMD_Z with swizzled color buffer (someday) */ + vector unsigned int uicolors = *((vector unsigned int *) &colors); + spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); +#endif + } + +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. + */ +static INLINE mask_t calculate_mask( int x ) +{ + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( void ) +{ + int minleft, maxright; + int x; + + switch (setup.span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup.span.left[0], setup.span.left[1]); + maxright = MAX2(setup.span.right[0], setup.span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup.span.left[0]; + maxright = setup.span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup.span.left[1]; + maxright = setup.span.right[1]; + break; + + default: + return; + } + + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { +#if 1 + emit_quad( x, setup.span.y, calculate_mask( x ) ); +#endif + } + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct vertex_header *v) +{ + int i; + fprintf(stderr, "Vertex: (%p)\n", v); + for (i = 0; i < setup.quad.nr_attrs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + } +} +#endif + + +static boolean setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) +{ + +#if DEBUG_VERTS + fprintf(stderr, "Triangle:\n"); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); +#endif + + setup.vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = spu_extract(v0->data[0], 1); + float y1 = spu_extract(v1->data[0], 1); + float y2 = spu_extract(v2->data[0], 1); + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup.vmin = v0; + setup.vmid = v1; + setup.vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup.vmin = v2; + setup.vmid = v0; + setup.vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup.vmin = v0; + setup.vmid = v2; + setup.vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup.vmin = v1; + setup.vmid = v0; + setup.vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup.vmin = v2; + setup.vmid = v1; + setup.vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup.vmin = v1; + setup.vmid = v2; + setup.vmax = v0; + } + } + } + + /* Check if triangle is completely outside the tile bounds */ + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) + return FALSE; + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) + return FALSE; + + setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); + setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup.emaj.dx * setup.ebot.dy - + setup.ebot.dx * setup.emaj.dy); + + setup.oneoverarea = 1.0f / area; + /* + _mesa_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup.oneoverarea, area, prim->det ); + */ + } + +#if 0 + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); +#endif + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot which attribute slot + */ +static INLINE void +const_coeff(uint slot) +{ + setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0.v = setup.vprovoke->data[slot]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static INLINE void +tri_linear_coeff(uint slot, uint firstComp, uint lastComp) +{ + uint i; + const float *vmin_d = (float *) &setup.vmin->data[slot]; + const float *vmid_d = (float *) &setup.vmid->data[slot]; + const float *vmax_d = (float *) &setup.vmax->data[slot]; + const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; + const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + + for (i = firstComp; i < lastComp; i++) { + float botda = vmid_d[i] - vmin_d[i]; + float majda = vmax_d[i] - vmin_d[i]; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + ASSERT(slot < PIPE_MAX_SHADER_INPUTS); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + setup.coef[slot].a0.f[i] = (vmin_d[i] - + (setup.coef[slot].dadx.f[i] * x + + setup.coef[slot].dady.f[i] * y)); + } + + /* + _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup.coef[slot].a0[i], + setup.coef[slot].dadx.f[i], + setup.coef[slot].dady.f[i]); + */ +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +#if 0 +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( unsigned slot, + unsigned i ) +{ + /* premultiply by 1/w: + */ + float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; + float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; + float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; + + float botda = mida - mina; + float majda = maxa - mina; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; + + /* + printf("tri persp %d,%d: %f %f %f\n", slot, i, + setup.vmin->data[slot][i], + setup.vmid->data[slot][i], + setup.vmax->data[slot][i] + ); + */ + + assert(slot < PIPE_MAX_SHADER_INPUTS); + assert(i <= 3); + + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + setup.coef[slot].a0.f[i] = (mina - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); +} +#endif + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients(void) +{ +#if 1 + uint i; + + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + switch (spu.vertex_info.interp_mode[i]) { + case INTERP_NONE: + break; + case INTERP_POS: + /*tri_linear_coeff(i, 2, 3);*/ + /* XXX interp W if PERSPECTIVE... */ + tri_linear_coeff4(i); + break; + case INTERP_CONSTANT: + const_coeff(i); + break; + case INTERP_LINEAR: + tri_linear_coeff4(i); + break; + case INTERP_PERSPECTIVE: + tri_linear_coeff4(i); /* temporary */ + break; + default: + ASSERT(0); + } + } +#else + ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); + ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || + spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); + tri_linear_coeff(0, 2, 3); /* slot 0, z */ + tri_linear_coeff(1, 0, 4); /* slot 1, color */ +#endif +} + + +static void setup_tri_edges(void) +{ + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + ASSERT((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); + } + + setup.span.left[_y&1] = left; + setup.span.right[_y&1] = right; + setup.span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +{ + setup.tx = tx; + setup.ty = ty; + + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; + /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup.oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); + } + + flush_spans(); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 0000000000..aa694dd7c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 0000000000..ac373240c1 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,165 @@ +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +//#include "tgsi_build.h" +#include "pipe/tgsi/util/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..45e3c26c00 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,673 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include +#include + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" + +#define CACHE_NAME attribute +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#include + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME attribute +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +#define DRAW_DBG 0 + +static const qword fetch_shuffle_data[] = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +static INLINE void +trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, + const qword *shuffle) +{ + qword t1 = si_shufb(row0, row2, shuffle[3]); + qword t2 = si_shufb(row0, row2, shuffle[4]); + qword t3 = si_shufb(row1, row3, shuffle[3]); + qword t4 = si_shufb(row1, row3, shuffle[4]); + + out[0] = si_shufb(t1, t3, shuffle[3]); + out[1] = si_shufb(t1, t3, shuffle[4]); + out[2] = si_shufb(t2, t4, shuffle[3]); + out[3] = si_shufb(t2, t4, shuffle[4]); +} + + +/** + * Fetch between 1 and 32 bytes from an unaligned address + */ +static INLINE void +fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + qword tmp[4]; + const int shift = ea & 0x0f; + const unsigned aligned_start_ea = ea & ~0x0f; + const unsigned aligned_end_ea = (ea + size) & ~0x0f; + const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < num_entries; i++) { + dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + } else { + /* Fetch data from the cache to the local buffer. + */ + for (i = 0; i < num_entries; i++) { + tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); + } + + + /* Fix the alignment of the data and write to the destination buffer. + */ + for (i = 0; i < ((size + 15) / 16); i++) { + dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), + (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); + } + } +} + + +#define CVT_32_FLOAT(q, s) (*(q)) + +static INLINE qword +CVT_64_FLOAT(const qword *qw, const qword *shuffle) +{ + qword a = si_frds(qw[0]); + qword b = si_frds(si_rotqbyi(qw[0], 8)); + qword c = si_frds(qw[1]); + qword d = si_frds(si_rotqbyi(qw[1], 8)); + + qword ab = si_shufb(a, b, shuffle[0]); + qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); + + return si_or(ab, cd); +} + + +static INLINE qword +CVT_8_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_USCALED(const qword *qw, const qword *shuffle) +{ + return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_USCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_cuflt(*qw, 0); +} + +static INLINE qword +CVT_8_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); +} + + +static INLINE qword +CVT_16_SSCALED(const qword *qw, const qword *shuffle) +{ + return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); +} + + +static INLINE qword +CVT_32_SSCALED(const qword *qw, const qword *shuffle) +{ + (void) shuffle; + return si_csflt(*qw, 0); +} + + +static INLINE qword +CVT_8_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 255.0f); + return si_fm(CVT_8_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 65535.0f); + return si_fm(CVT_16_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_UNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); + return si_fm(CVT_32_USCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_8_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 127.0f); + return si_fm(CVT_8_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_16_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 32767.0f); + return si_fm(CVT_16_SSCALED(qw, shuffle), scale); +} + + +static INLINE qword +CVT_32_SNORM(const qword *qw, const qword *shuffle) +{ + const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); + return si_fm(CVT_32_SSCALED(qw, shuffle), scale); +} + +#define SZ_4 si_il(0U) +#define SZ_3 si_fsmbi(0x000f) +#define SZ_2 si_fsmbi(0x00ff) +#define SZ_1 si_fsmbi(0x0fff) + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ +static void \ +fetch_##NAME(qword *out, const qword *in, qword defaults, \ + const qword *shuffle) \ +{ \ + qword tmp[4]; \ + \ + tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ + tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ + tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ + tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ + trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ +} + + +FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) +FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) + +FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) +FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) +FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) +FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) +FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) +FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) +FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) +FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) +FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) +FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) +FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) +FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) +FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) +FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) + +FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) + + + +static spu_fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case 0: + return NULL; /* not sure why this is needed */ + + default: + assert(0); + return NULL; + } +} + + +static unsigned get_vertex_size( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return 8; + case PIPE_FORMAT_R64G64_FLOAT: + return 2 * 8; + case PIPE_FORMAT_R64G64B64_FLOAT: + return 3 * 8; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return 4 * 8; + + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32_USCALED: + case PIPE_FORMAT_R32_UNORM: + case PIPE_FORMAT_R32_FLOAT: + return 4; + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + return 2 * 4; + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32B32_USCALED: + case PIPE_FORMAT_R32G32B32_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + return 3 * 4; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_USCALED: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return 4 * 4; + + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_USCALED: + return 2; + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16G16_UNORM: + return 2 * 2; + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16_UNORM: + return 3 * 2; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16A16_UNORM: + return 4 * 2; + + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8_UNORM: + return 1; + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8_UNORM: + return 2 * 1; + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8B8_UNORM: + return 3 * 1; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return 4 * 1; + + case 0: + return 0; /* not sure why this is needed */ + + default: + assert(0); + return 0; + } +} + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + assert(count <= 4); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + unsigned i; + unsigned idx; + const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; + const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; + qword in[2 * 4]; + + + /* Fetch four attributes for four vertices. + */ + idx = 0; + for (i = 0; i < count; i++) { + const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + + fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; + } + + /* Be nice and zero out any missing vertices. + */ + (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + + /* Convert all 4 vertices to vectors of float. + */ + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, + fetch_shuffle_data); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + unsigned i; + + + /* Invalidate the vertex cache. + */ + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + CACHELINE_CLEARVALID(i); + } + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + draw->vertex_fetch.fetch[i] = + get_fetch_func(draw->vertex_fetch.format[i]); + draw->vertex_fetch.size[i] = + get_vertex_size(draw->vertex_fetch.format[i]); + } + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..c1cbbb6d1e --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + * Ian Romanick + */ + +#include + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_context.h" +#include "pipe/cell/common.h" +#include "spu_main.h" + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +static void +spu_bind_vertex_shader(struct spu_vs_context *draw, + void *uniforms, + void *planes, + unsigned nr_planes, + unsigned num_outputs + ) +{ + draw->constants = (float (*)[4]) uniforms; + + (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); + draw->nr_planes = nr_planes; + draw->num_vs_outputs = num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) + + (immediate_addr & 0x0f)); + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->shader.instructions; + draw->machine.NumInstructions = vs->shader.num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->shader.declarations; + draw->machine.NumDeclarations = vs->shader.num_declarations; + + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->shader.num_immediates); + + spu_bind_vertex_shader(draw, vs->shader.uniforms, + vs->plane, vs->nr_planes, + vs->shader.num_outputs); + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..b5bf31e67d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,63 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, + const qword *shuffle_data); +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_ATTRIB_MAX]; + unsigned pitch[PIPE_ATTRIB_MAX]; + unsigned size[PIPE_ATTRIB_MAX]; + enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned nr_attrs; + boolean dirty; + + spu_fetch_func fetch[PIPE_ATTRIB_MAX]; + spu_full_fetch_func fetch_func; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h new file mode 100644 index 0000000000..ce8ad00339 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_ztest.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Zbuffer/depth test code. + */ + + +#ifndef SPU_ZTEST_H +#define SPU_ZTEST_H + + +#ifdef __SPU__ +#include +#endif + + + +/** + * Perform Z testing for a 16-bit/value Z buffer. + * + * \param zvals vector of four fragment zvalues as floats + * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this + * contains the Z values for 2 quads, 8 pixels. + * \param x x coordinate of quad (only lsbit is significant) + * \param inMask indicates which fragments in the quad are alive + * \return new mask indicating which fragments are alive after ztest + */ +static INLINE vector unsigned int +spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, + uint x, vector unsigned int inMask) +{ +#define ZERO 0x80 + vector unsigned int zvals_ui4, zbuf_ui4, mask; + + /* convert floats to uints in [0, 65535] */ + zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ + zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ + + /* XXX this conditional could be removed with a bit of work */ + if (x & 1) { + /* convert zbuffer values from ushorts to uints */ + /* gather lower four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, + ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 16, 17, 18, 19, 20, 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15})); + } + else { + /* convert zbuffer values from ushorts to uints */ + /* gather upper four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31})); + } + return mask; +#undef ZERO +} + + +/** + * As above, but Zbuffer values as 32-bit uints + */ +static INLINE vector unsigned int +spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, + vector unsigned int inMask) +{ + vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; + + /* convert floats to uints in [0, 0xffffffff] */ + zvals_ui4 = spu_convtu(zvals, 32); + /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); + + return mask; +} + + +#endif /* SPU_ZTEST_H */ -- cgit v1.2.3 From 6acd63a4980951727939c0dd545a0324965b3834 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:50:12 +0900 Subject: Code reorganization: update build. Update the Makefiles and includes for the new paths. Note that there hasn't been no separation of the Makefiles yet, and make is jumping all over the place. That will be taken care shortly. But for now, make should work. It was tested with linux and linux-dri. Linux-cell and linux-llvm might require some minor tweaks. --- configs/beos | 2 +- configs/darwin | 2 +- configs/darwin-x86ppc | 2 +- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/linux-cell | 2 +- configs/linux-directfb | 2 +- configs/linux-dri | 6 +- configs/linux-dri-xcb | 4 +- configs/linux-fbdev | 2 +- configs/linux-osmesa | 2 +- configs/linux-osmesa16 | 2 +- configs/linux-osmesa16-static | 2 +- configs/linux-osmesa32 | 2 +- configs/linux-solo | 2 +- src/gallium/Makefile | 12 +-- src/gallium/Makefile.template | 7 +- src/gallium/aux/Makefile | 24 +++++ src/gallium/aux/draw/draw_private.h | 2 +- src/gallium/aux/draw/draw_vertex.c | 4 +- src/gallium/aux/draw/draw_vertex_shader.c | 4 +- src/gallium/aux/llvm/Makefile | 4 +- src/gallium/aux/llvm/gallivm.cpp | 4 +- src/gallium/aux/llvm/gallivm_cpu.cpp | 4 +- src/gallium/aux/llvm/tgsitollvm.cpp | 10 +- src/gallium/aux/pipebuffer/Makefile | 2 +- src/gallium/aux/tgsi/exec/tgsi_exec.c | 4 +- src/gallium/aux/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/aux/tgsi/util/tgsi_transform.h | 4 +- src/gallium/drivers/Makefile | 24 +++++ src/gallium/drivers/cell/ppu/Makefile | 7 +- src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 6 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_blend.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_clip.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_fs.c | 8 +- .../drivers/cell/ppu/cell_state_rasterizer.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_sampler.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 6 +- src/gallium/drivers/cell/spu/Makefile | 6 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_render.h | 2 +- src/gallium/drivers/cell/spu/spu_tile.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 6 +- src/gallium/drivers/failover/Makefile | 2 +- src/gallium/drivers/i915simple/Makefile | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/i915simple/i915_strings.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/Makefile | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_strings.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/Makefile | 2 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 2 +- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 6 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_state_clip.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 6 +- src/gallium/drivers/softpipe/sp_state_fs.c | 8 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 +- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/winsys/dri/Makefile | 38 +++++++ src/gallium/winsys/dri/Makefile.template | 113 +++++++++++++++++++++ src/gallium/winsys/dri/intel/Makefile | 8 +- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 2 +- .../winsys/dri/intel/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 6 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/Makefile | 16 ++- src/mesa/drivers/x11/xm_api.c | 2 +- src/mesa/drivers/x11/xm_dd.c | 2 +- src/mesa/drivers/x11/xm_surface.c | 8 +- src/mesa/drivers/x11/xm_winsys.c | 2 +- src/mesa/drivers/x11/xmesaP.h | 4 +- src/mesa/sources | 83 +++++++-------- src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_cache.c | 4 +- src/mesa/state_tracker/st_cache.h | 2 +- src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 6 +- src/mesa/state_tracker/st_cb_program.c | 4 +- src/mesa/state_tracker/st_cb_rasterpos.c | 4 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_context.c | 4 +- src/mesa/state_tracker/st_debug.c | 4 +- src/mesa/state_tracker/st_draw.c | 4 +- src/mesa/state_tracker/st_gen_mipmap.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 4 +- 127 files changed, 445 insertions(+), 241 deletions(-) create mode 100644 src/gallium/aux/Makefile create mode 100644 src/gallium/drivers/Makefile create mode 100644 src/gallium/winsys/dri/Makefile create mode 100644 src/gallium/winsys/dri/Makefile.template (limited to 'src/gallium/drivers/cell') diff --git a/configs/beos b/configs/beos index f07973d0c7..2b74af739d 100644 --- a/configs/beos +++ b/configs/beos @@ -86,7 +86,7 @@ else endif # Directories -SRC_DIRS = mesa glu glut/beos +SRC_DIRS = gallium mesa glu glut/beos GLU_DIRS = sgi DRIVER_DIRS = beos PROGRAM_DIRS = beos samples redbook demos tests diff --git a/configs/darwin b/configs/darwin index 7826ecc605..bba7802696 100644 --- a/configs/darwin +++ b/configs/darwin @@ -25,5 +25,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/darwin-x86ppc b/configs/darwin-x86ppc index 13172327a7..ebeb25051f 100644 --- a/configs/darwin-x86ppc +++ b/configs/darwin-x86ppc @@ -29,5 +29,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/default b/configs/default index 166205a1d3..25a87e66a1 100644 --- a/configs/default +++ b/configs/default @@ -60,7 +60,7 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = mesa glu glut/glx glw +SRC_DIRS = gallium mesa glu glut/glx glw GLU_DIRS = sgi DRIVER_DIRS = x11 osmesa # Which subdirs under $(TOP)/progs/ to enter: diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 402883d1de..67d253b869 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -36,7 +36,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/X11R6/lib -lGL -lXt -lX11 # Directories -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw DRIVER_DIRS = dri PROGRAM_DIRS = WINDOW_SYSTEM=dri diff --git a/configs/linux-cell b/configs/linux-cell index 3d874491e4..fdf20deeeb 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -21,7 +21,7 @@ CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$( CXXFLAGS = $(CFLAGS) # Omitting glw here: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx MKDEP_OPTIONS = -fdepend -Y diff --git a/configs/linux-directfb b/configs/linux-directfb index 09332f4808..dff27f7850 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -22,7 +22,7 @@ ifeq ($(HAVE_X86), yes) endif # Directories -SRC_DIRS = mesa glu glut/directfb +SRC_DIRS = gallium mesa glu glut/directfb GLU_DIRS = sgi DRIVER_DIRS = directfb PROGRAM_DIRS = demos directfb diff --git a/configs/linux-dri b/configs/linux-dri index 936fce9982..e6135856fc 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,10 +54,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif @@ -66,4 +66,4 @@ WINDOW_SYSTEM=dri # gamma are missing because they have not been converted to use the new # interface. -DRI_DIRS = intel_winsys +DRI_DIRS = intel diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index aa292a13ec..ea4bdf1864 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -53,10 +53,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif diff --git a/configs/linux-fbdev b/configs/linux-fbdev index e36d20a702..1ddccb3f52 100644 --- a/configs/linux-fbdev +++ b/configs/linux-fbdev @@ -6,7 +6,7 @@ CONFIG_NAME = linux-fbdev CFLAGS = -O3 -ffast-math -ansi -pedantic -fPIC -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DPTHREADS -DUSE_GLFBDEV_DRIVER -SRC_DIRS = mesa glu glut/fbdev +SRC_DIRS = gallium mesa glu glut/fbdev DRIVER_DIRS = fbdev osmesa PROGRAM_DIRS = fbdev demos redbook samples diff --git a/configs/linux-osmesa b/configs/linux-osmesa index cc1fbbd109..0382a19553 100644 --- a/configs/linux-osmesa +++ b/configs/linux-osmesa @@ -14,7 +14,7 @@ CXXFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOUR # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = osdemos diff --git a/configs/linux-osmesa16 b/configs/linux-osmesa16 index 1fb0186d31..9a527592f1 100644 --- a/configs/linux-osmesa16 +++ b/configs/linux-osmesa16 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa16.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa16-static b/configs/linux-osmesa16-static index 6645504478..1e6380b02e 100644 --- a/configs/linux-osmesa16-static +++ b/configs/linux-osmesa16-static @@ -18,7 +18,7 @@ OSMESA_LIB_NAME = libOSMesa16.a # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa32 b/configs/linux-osmesa32 index a1e5a358d6..f0ef1831b0 100644 --- a/configs/linux-osmesa32 +++ b/configs/linux-osmesa32 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa32.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-solo b/configs/linux-solo index 220fe58b9a..d49b972228 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -43,7 +43,7 @@ GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -lpthread # Directories -SRC_DIRS = glx/mini mesa glu glut/mini +SRC_DIRS = glx/mini gallium mesa glu glut/mini DRIVER_DIRS = dri PROGRAM_DIRS = miniglx diff --git a/src/gallium/Makefile b/src/gallium/Makefile index d880d090c1..a13b9a52d3 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -1,16 +1,8 @@ -TOP = ../../.. +TOP = ../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) $(LLVM_DIR) +SUBDIRS = aux drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 8e84f8eb2d..0717ed8dd2 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -15,7 +15,10 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/include/pipe \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ $(DRIVER_INCLUDES) @@ -38,7 +41,7 @@ INCLUDES = \ default: depend symlinks $(LIBNAME) -$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/mesa/pipe/Makefile.template +$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) diff --git a/src/gallium/aux/Makefile b/src/gallium/aux/Makefile new file mode 100644 index 0000000000..da68498aa1 --- /dev/null +++ b/src/gallium/aux/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_DIR = llvm +endif + +SUBDIRS = pipebuffer $(LLVM_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/aux/draw/draw_private.h b/src/gallium/aux/draw/draw_private.h index b17eaaed65..3d09aef87c 100644 --- a/src/gallium/aux/draw/draw_private.h +++ b/src/gallium/aux/draw/draw_private.h @@ -45,7 +45,7 @@ #include "pipe/p_defines.h" #include "x86/rtasm/x86sse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" struct gallivm_prog; diff --git a/src/gallium/aux/draw/draw_vertex.c b/src/gallium/aux/draw/draw_vertex.c index 2d6592150f..daf1ef4b80 100644 --- a/src/gallium/aux/draw/draw_vertex.c +++ b/src/gallium/aux/draw/draw_vertex.c @@ -34,8 +34,8 @@ */ -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/aux/draw/draw_vertex_shader.c b/src/gallium/aux/draw/draw_vertex_shader.c index c824c1407e..377ecbb931 100644 --- a/src/gallium/aux/draw/draw_vertex_shader.c +++ b/src/gallium/aux/draw/draw_vertex_shader.c @@ -34,13 +34,13 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #if defined(__i386__) || defined(__386__) -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "draw_private.h" #include "draw_context.h" #include "x86/rtasm/x86sse.h" -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #define DBG_VS 0 diff --git a/src/gallium/aux/llvm/Makefile b/src/gallium/aux/llvm/Makefile index 9c6e16d86b..e6ac399d08 100644 --- a/src/gallium/aux/llvm/Makefile +++ b/src/gallium/aux/llvm/Makefile @@ -30,7 +30,9 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/aux/llvm/gallivm.cpp b/src/gallium/aux/llvm/gallivm.cpp index da0105c2c9..d14bb3b99a 100644 --- a/src/gallium/aux/llvm/gallivm.cpp +++ b/src/gallium/aux/llvm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/gallivm_cpu.cpp b/src/gallium/aux/llvm/gallivm_cpu.cpp index dc4d92a72a..8f9830d0b1 100644 --- a/src/gallium/aux/llvm/gallivm_cpu.cpp +++ b/src/gallium/aux/llvm/gallivm_cpu.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/tgsitollvm.cpp b/src/gallium/aux/llvm/tgsitollvm.cpp index 0de595e678..2cb4acce32 100644 --- a/src/gallium/aux/llvm/tgsitollvm.cpp +++ b/src/gallium/aux/llvm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" #include diff --git a/src/gallium/aux/pipebuffer/Makefile b/src/gallium/aux/pipebuffer/Makefile index 75764a9a18..588629e870 100644 --- a/src/gallium/aux/pipebuffer/Makefile +++ b/src/gallium/aux/pipebuffer/Makefile @@ -17,7 +17,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/aux/tgsi/exec/tgsi_exec.c b/src/gallium/aux/tgsi/exec/tgsi_exec.c index 37e6007068..a8f64c2287 100644 --- a/src/gallium/aux/tgsi/exec/tgsi_exec.c +++ b/src/gallium/aux/tgsi/exec/tgsi_exec.c @@ -54,8 +54,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #define TILE_TOP_LEFT 0 diff --git a/src/gallium/aux/tgsi/exec/tgsi_sse2.c b/src/gallium/aux/tgsi/exec/tgsi_sse2.c index 1e56e4afb6..593464db3e 100755 --- a/src/gallium/aux/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/aux/tgsi/exec/tgsi_sse2.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #include "tgsi_sse2.h" diff --git a/src/gallium/aux/tgsi/util/tgsi_transform.h b/src/gallium/aux/tgsi/util/tgsi_transform.h index 365d8c298c..fcf85d603b 100644 --- a/src/gallium/aux/tgsi/util/tgsi_transform.h +++ b/src/gallium/aux/tgsi/util/tgsi_transform.h @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..c0345a9cb5 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_DIR = cell +endif + +SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 50060f5cd3..011863c11e 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -40,8 +40,11 @@ SOURCES = \ OBJECTS = $(SOURCES:.c=.o) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa - +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 07b908eec5..e588a30d5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,7 +35,7 @@ #include #include "pipe/p_inlines.h" #include "pipe/p_util.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_batch.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index bbe1fd7a11..e1eb22f468 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,9 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b63419b5e..6196c0c72f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -32,10 +32,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" #include "cell_winsys.h" -#include "pipe/cell/common.h" +#include "cell/common.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 717cd8370f..f12613649b 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -39,7 +39,7 @@ #include "cell_draw_arrays.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index f62bc4650c..20f27531fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -31,7 +31,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_render.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index 4ab277a4b2..b663b37622 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -34,7 +34,7 @@ #include "cell_render.h" #include "cell_spu.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" struct render_stage { diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 7c83a47e57..419e74dc40 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -31,7 +31,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/cell/common.h" +#include "cell/common.h" /* diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 19eff94f96..137f26612e 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -31,7 +31,7 @@ #include #include -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c index 4fc60548c8..b6d6d71f0c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -29,7 +29,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c index 4f43665941..0482f87e88 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -30,7 +30,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void cell_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 56daf5dfde..0c46829258 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "cell_context.h" #include "cell_batch.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index 3f46a87d18..b2ed699a5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -29,12 +29,12 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c index d8128ece54..7eca5b5765 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c index ade6cc8338..a33421a4ad 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -30,7 +30,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 0f01e920f9..563831b62d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -32,7 +32,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index fca93e4742..a35db0ef99 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e9fafe492e..cc727ff4ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -36,7 +36,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 80dd500b34..0ba4506505 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -38,9 +38,9 @@ #include "cell_spu.h" #include "cell_batch.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** * Run the vertex shader on all vertices in the vertex queue. diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index f202971d73..7aa947299e 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -31,7 +31,11 @@ SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index e51008b9b3..109540b1f7 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -67,8 +67,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index b4c7661ef6..3e17c490d2 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e375197fe6..1e7243b863 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,7 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 1710a17512..5c95d112ac 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -31,8 +31,8 @@ #include -#include "pipe/cell/common.h" -#include "pipe/draw/draw_vertex.h" +#include "cell/common.h" +#include "draw/draw_vertex.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 932fb500b3..20e77aa2e6 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -34,7 +34,7 @@ #include "spu_render.h" #include "spu_tri.h" #include "spu_tile.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h index fbcdc5ec31..493434f087 100644 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -29,7 +29,7 @@ #ifndef SPU_RENDER_H #define SPU_RENDER_H -#include "pipe/cell/common.h" +#include "cell/common.h" extern void cmd_render(const struct cell_command_render *render, uint *pos_incr); diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index e53340a55a..3105b848fd 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -32,7 +32,7 @@ #include #include #include "spu_main.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ac373240c1..ea4274a0a7 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" //#include "tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index c1cbbb6d1e..3f5bf41aa2 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -39,9 +39,9 @@ #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" #include "spu_exec.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cell/common.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" #include "spu_main.h" static INLINE unsigned diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 72d0895c74..14389bd055 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -15,7 +15,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 2f91de3afc..ee22ba86f9 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -32,7 +32,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 497623a700..7f71f8fd4f 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -32,7 +32,7 @@ #include "i915_texture.h" #include "i915_reg.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index b4ea63c3e7..2d876925b2 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 868f0c7e04..6c1524c768 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,9 +33,9 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index c4a706c37d..44c4325936 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" #include "pipe/p_util.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e069773fd4..c5bf6174f6 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,7 +38,7 @@ */ -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index abd5571b88..294e6fad03 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -29,7 +29,7 @@ */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 653983e4a9..4767584fc6 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index c713bf7208..301fedea19 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -70,7 +70,7 @@ static const char *i915_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + sprintf(buffer, "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index de0cc5fe06..17fd27895a 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -33,7 +33,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 48c00ab50b..1dec1f9749 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -61,6 +61,6 @@ ASM_SOURCES = DRIVER_DEFINES = -I. -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 431b45466a..a762a870fe 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 95dfce88e4..f746d1cc57 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c index 29a41ed1e9..3d9c50961f 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -59,7 +59,7 @@ static const char *brw_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + sprintf(buffer, "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 518845e4b2..376a42b1a6 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -32,7 +32,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 98915ba101..05df4860ed 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index b45a333a2e..97418a52e7 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index d95645d108..44f946ea74 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 31438a882e..2304ea4246 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -44,7 +44,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index cea6b90104..5e98f190bb 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index aff8c2cc5d..8c79cb3ce4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 71a303a8b5..2049afda34 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,7 +38,7 @@ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index ced0d5d098..2cbd0d7cab 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 0ae31d8796..9cf8222133 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,7 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 2772048661..d73521ccbe 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -38,8 +38,8 @@ #include "sp_quad.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f71fdb6a9..69bea8a8f5 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -39,9 +39,9 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 3316858413..b4c01a7ea8 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -42,7 +42,7 @@ #include "x86/rtasm/x86sse.h" #ifdef MESA_LLVM -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #endif #include "sp_context.h" diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 6a8a43aeda..adf9ccf64c 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 08c5f06d05..c797c0dd3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -29,7 +29,7 @@ */ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void softpipe_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 372597869f..9d8fd8b750 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -27,9 +27,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 0b814fc284..1e3cadd43d 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,11 +32,11 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" void * diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 53755099dd..98e04352db 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index ea348c7e95..460adccec4 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -31,14 +31,14 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 09ff540ccf..f01a10de3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,7 +33,7 @@ #include "sp_state.h" #include "sp_surface.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 8802ced187..653449c4f1 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 325bdb86da..2f82fd6abe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" /* diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1597361b82..dde3fabc81 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -34,7 +34,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_tile_cache.h" diff --git a/src/gallium/winsys/dri/Makefile b/src/gallium/winsys/dri/Makefile new file mode 100644 index 0000000000..f466ce6c3c --- /dev/null +++ b/src/gallium/winsys/dri/Makefile @@ -0,0 +1,38 @@ +# src/mesa/drivers/dri/Makefile + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +default: $(TOP)/$(LIB_DIR) subdirs + + +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) + + +subdirs: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +install: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) ; \ + fi \ + done + -rm -f common/*.o diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template new file mode 100644 index 0000000000..b96305c094 --- /dev/null +++ b/src/gallium/winsys/dri/Makefile.template @@ -0,0 +1,113 @@ +# -*-makefile-*- + +MESA_MODULES = $(TOP)/src/mesa/libmesa.a + +COMMON_GALLIUM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/utils.c \ + $(TOP)/src/mesa/drivers/dri/common/vblank.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_util.c \ + $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + $(TOP)/src/mesa/drivers/dri/common/texmem.c \ + $(TOP)/src/mesa/drivers/dri/common/drirenderbuffer.c + +COMMON_BM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c + + +ifeq ($(WINDOW_SYSTEM),dri) +WINOBJ= +WINLIB= +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + +else +# miniglx +WINOBJ= +WINLIB=-L$(MESA)/src/glx/mini +MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini +INCLUDES = $(MINIGLX_INCLUDES) \ + $(SHARED_INCLUDES) \ + $(PCIACCESS_CFLAGS) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(MINIGLX_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) +endif + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) + + +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) + + +depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +install: $(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + +include depend diff --git a/src/gallium/winsys/dri/intel/Makefile b/src/gallium/winsys/dri/intel/Makefile index 9ae0f01325..40654bb2ac 100644 --- a/src/gallium/winsys/dri/intel/Makefile +++ b/src/gallium/winsys/dri/intel/Makefile @@ -7,8 +7,8 @@ LIBNAME = i915tex_dri.so MINIGLX_SOURCES = server/intel_dri.c PIPE_DRIVERS = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a DRIVER_SOURCES = \ intel_winsys_pipe.c \ @@ -28,11 +28,11 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ +DRIVER_DEFINES = -I$(TOP)/src/mesa/drivers/dri/intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") include ../Makefile.template -intel_tex_layout.o: ../intel/intel_tex_layout.c +intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c symlinks: diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 1ba6a9e1b2..0ed3890e93 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,7 +39,7 @@ #include "intel_winsys.h" #include "pipe/p_util.h" -#include "pipe/i915simple/i915_winsys.h" +#include "i915simple/i915_winsys.h" struct intel_i915_winsys { diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index cec3437831..9e483bdc9f 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -34,7 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" struct intel_softpipe_winsys { diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index c3cd22eea3..8da596d419 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -41,11 +41,11 @@ #include "pipe/p_context.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL -#include "pipe/cell/ppu/cell_context.h" -#include "pipe/cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ #endif diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index bf41570257..dbfd37bda2 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/i965simple/brw_winsys.h" +#include "i965simple/brw_winsys.h" #include "brw_aub.h" #include "xm_winsys_aub.h" diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 720f1b2e02..561608fedd 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,16 +12,16 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) PIPE_LIB = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i965simple/libi965simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i965simple/libi965simple.a ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/mesa/pipe/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/mesa/pipe/cell/spu/g3d_spu.a +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/mesa/pipe/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a endif .SUFFIXES : .cpp @@ -71,7 +71,7 @@ libmesa.a: $(SOLO_OBJECTS) fi linux-solo: depend subdirs libmesa.a - cd drivers/dri ; $(MAKE) + cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) ##################################################################### @@ -165,7 +165,6 @@ depend: $(ALL_SOURCES) subdirs: @ (cd x86 ; $(MAKE)) @ (cd x86-64 ; $(MAKE)) - (cd pipe ; $(MAKE)) install: default $(INSTALL) -d $(INSTALL_DIR)/include/GL @@ -178,7 +177,7 @@ install: default $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd drivers/dri ; $(MAKE) install ; \ + cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ fi ## NOT INSTALLED YET: @@ -198,7 +197,6 @@ clean: (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) - (cd pipe ; $(MAKE) clean ) include depend diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 08c98eab48..18b033666f 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -85,7 +85,7 @@ #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "pipe/p_defines.h" /** diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 8ae243ae66..34287effe1 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -53,7 +53,7 @@ #include "tnl/tnl.h" #include "tnl/t_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" #include "state_tracker/st_draw.h" diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 5533158ece..81616b92d9 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -45,10 +45,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_clear.h" -#include "pipe/softpipe/sp_tile_cache.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_clear.h" +#include "softpipe/sp_tile_cache.h" +#include "softpipe/sp_surface.h" #include "state_tracker/st_context.h" diff --git a/src/mesa/drivers/x11/xm_winsys.c b/src/mesa/drivers/x11/xm_winsys.c index a690df2772..2edc697693 100644 --- a/src/mesa/drivers/x11/xm_winsys.c +++ b/src/mesa/drivers/x11/xm_winsys.c @@ -38,7 +38,7 @@ #include "main/macros.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" /** diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 4709d63394..fd2dfcd79a 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -37,8 +37,8 @@ #include "xm_image.h" #endif #include "state_tracker/st_cb_fbo.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_surface.h" extern _glthread_Mutex _xmesa_lock; diff --git a/src/mesa/sources b/src/mesa/sources index 1165425183..2d07738210 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - pipe/draw/draw_clip.c \ - pipe/draw/draw_context.c\ - pipe/draw/draw_cull.c \ - pipe/draw/draw_debug.c \ - pipe/draw/draw_flatshade.c \ - pipe/draw/draw_offset.c \ - pipe/draw/draw_prim.c \ - pipe/draw/draw_stipple.c \ - pipe/draw/draw_twoside.c \ - pipe/draw/draw_unfilled.c \ - pipe/draw/draw_validate.c \ - pipe/draw/draw_vbuf.c \ - pipe/draw/draw_vertex.c \ - pipe/draw/draw_vertex_cache.c \ - pipe/draw/draw_vertex_fetch.c \ - pipe/draw/draw_vertex_shader.c \ - pipe/draw/draw_vf.c \ - pipe/draw/draw_vf_generic.c \ - pipe/draw/draw_vf_sse.c \ - pipe/draw/draw_wide_prims.c + $(TOP)/src/gallium/aux/draw/draw_clip.c \ + $(TOP)/src/gallium/aux/draw/draw_context.c\ + $(TOP)/src/gallium/aux/draw/draw_cull.c \ + $(TOP)/src/gallium/aux/draw/draw_debug.c \ + $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ + $(TOP)/src/gallium/aux/draw/draw_offset.c \ + $(TOP)/src/gallium/aux/draw/draw_prim.c \ + $(TOP)/src/gallium/aux/draw/draw_stipple.c \ + $(TOP)/src/gallium/aux/draw/draw_twoside.c \ + $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ + $(TOP)/src/gallium/aux/draw/draw_validate.c \ + $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/aux/draw/draw_vf.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/aux/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - pipe/tgsi/exec/tgsi_exec.c \ - pipe/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - pipe/tgsi/util/tgsi_build.c \ - pipe/tgsi/util/tgsi_dump.c \ - pipe/tgsi/util/tgsi_parse.c \ - pipe/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - pipe/cso_cache/cso_hash.c \ - pipe/cso_cache/cso_cache.c + $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/aux/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - pipe/util/p_debug.c \ - pipe/util/p_tile.c \ - pipe/util/p_util.c + $(TOP)/src/gallium/aux/util/p_debug.c \ + $(TOP)/src/gallium/aux/util/p_tile.c \ + $(TOP)/src/gallium/aux/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -331,13 +331,13 @@ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c X11_DRIVER_SOURCES = \ - pipe/xlib/glxapi.c \ - pipe/xlib/fakeglx.c \ - pipe/xlib/xfonts.c \ - pipe/xlib/xm_api.c \ - pipe/xlib/xm_winsys.c \ - pipe/xlib/xm_winsys_aub.c \ - pipe/xlib/brw_aub.c + $(TOP)/src/gallium/winsys/xlib/glxapi.c \ + $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ + $(TOP)/src/gallium/winsys/xlib/xfonts.c \ + $(TOP)/src/gallium/winsys/xlib/xm_api.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ + $(TOP)/src/gallium/winsys/xlib/brw_aub.c OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c @@ -425,7 +425,10 @@ FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/aux OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -435,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/src/mesa/pipe/tgsi + -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 2c6ec8421b..b67b620eaa 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_cache.h" diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index e0965b217a..2979e7fae5 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -36,8 +36,8 @@ #include "pipe/p_state.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/cso_cache/cso_hash.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" /* Those function will either find the state of the given template diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h index e0c176b0ff..b81de316ec 100644 --- a/src/mesa/state_tracker/st_cache.h +++ b/src/mesa/state_tracker/st_cache.h @@ -33,7 +33,7 @@ #ifndef ST_CACHE_H #define ST_CACHE_H -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" struct pipe_blend_state; struct pipe_sampler_state; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 3a3bf9016d..663c4f205d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f13199a3c0..e2d4e06da1 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -56,7 +56,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 31744151f1..5315294c07 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -53,10 +53,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index af3ee65504..61d4f4c41c 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -39,8 +39,8 @@ #include "shader/programopt.h" #include "shader/shader_api.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" +#include "cso_cache/cso_cache.h" +#include "draw/draw_context.h" #include "st_context.h" #include "st_program.h" diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 7e347c4893..5b0eb6022b 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -44,8 +44,8 @@ #include "st_draw.h" #include "st_cb_rasterpos.h" #include "st_draw.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 868c5f3c5f..c89c74229e 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "st_context.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 91a40288cc..03dbb30b0f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -47,7 +47,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define DBG if (0) printf diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bf4618bed8..09e389f9dc 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -54,8 +54,8 @@ #include "pipe/p_context.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cso_cache/cso_cache.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_cache.h" /** diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 57450e52bf..5888bcb98a 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,9 +31,9 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_debug.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index ae9f5c8b11..1c0fa8c6aa 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -47,8 +47,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" static GLuint double_types[4] = { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 459941cca8..6c09b86033 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -37,7 +37,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 325aa20173..97206752af 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index c8297baded..dc992ee9c2 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -38,8 +38,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "draw/draw_context.h" +#include "tgsi/util/tgsi_dump.h" #include "st_context.h" #include "st_cache.h" -- cgit v1.2.3 From 66f22aa3bf7fa546e946b45156aa578e202982c9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 20:11:40 +0900 Subject: Code reorganization: s/aux/auxiliary/ -- update build. --- src/gallium/Makefile | 2 +- src/gallium/Makefile.template | 2 +- src/gallium/auxiliary/llvm/Makefile | 2 +- src/gallium/drivers/cell/ppu/Makefile | 2 +- src/gallium/drivers/cell/spu/Makefile | 2 +- src/gallium/winsys/dri/Makefile.template | 2 +- src/mesa/Makefile | 2 +- src/mesa/sources | 66 ++++++++++++++++---------------- 8 files changed, 40 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers/cell') diff --git a/src/gallium/Makefile b/src/gallium/Makefile index a13b9a52d3..89e068a449 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -2,7 +2,7 @@ TOP = ../.. include $(TOP)/configs/current -SUBDIRS = aux drivers +SUBDIRS = auxiliary drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 0717ed8dd2..83b25f9b47 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -17,7 +17,7 @@ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/include/pipe \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile index e6ac399d08..e0abf860c1 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/llvm/Makefile @@ -31,7 +31,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/drivers - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 011863c11e..a4c3f29e8a 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -43,7 +43,7 @@ OBJECTS = $(SOURCES:.c=.o) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 7aa947299e..30ef2450ec 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -34,7 +34,7 @@ SPU_ASM_OUT = $(SOURCES:.c=.s) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index b96305c094..2a261ed669 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -49,7 +49,7 @@ SHARED_INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/include/GL/internal \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main \ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 561608fedd..c8cb2b592f 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -21,7 +21,7 @@ CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a endif .SUFFIXES : .cpp diff --git a/src/mesa/sources b/src/mesa/sources index 2d07738210..cecd8a830f 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - $(TOP)/src/gallium/aux/draw/draw_clip.c \ - $(TOP)/src/gallium/aux/draw/draw_context.c\ - $(TOP)/src/gallium/aux/draw/draw_cull.c \ - $(TOP)/src/gallium/aux/draw/draw_debug.c \ - $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ - $(TOP)/src/gallium/aux/draw/draw_offset.c \ - $(TOP)/src/gallium/aux/draw/draw_prim.c \ - $(TOP)/src/gallium/aux/draw/draw_stipple.c \ - $(TOP)/src/gallium/aux/draw/draw_twoside.c \ - $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ - $(TOP)/src/gallium/aux/draw/draw_validate.c \ - $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ - $(TOP)/src/gallium/aux/draw/draw_vf.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ - $(TOP)/src/gallium/aux/draw/draw_wide_prims.c + $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ + $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_offset.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_prim.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_validate.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ - $(TOP)/src/gallium/aux/cso_cache/cso_cache.c + $(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/util/p_debug.c \ - $(TOP)/src/gallium/aux/util/p_tile.c \ - $(TOP)/src/gallium/aux/util/p_util.c + $(TOP)/src/gallium/auxiliary/util/p_debug.c \ + $(TOP)/src/gallium/auxiliary/util/p_tile.c \ + $(TOP)/src/gallium/auxiliary/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -428,7 +428,7 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa/main \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/aux + -I$(TOP)/src/gallium/auxiliary OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -438,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi + -I$(TOP)/s$(TOP)/src/gallium/auxiliary/tgsi -- cgit v1.2.3 From 397b81bd1c7984b1667af7ef954e053263a7a661 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 15 Feb 2008 09:43:13 -0800 Subject: Move cell_vertex_fetch.c for recent code reorg. --- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 392 +++++++++++++++++++++++ src/mesa/pipe/cell/ppu/cell_vertex_fetch.c | 392 ----------------------- 2 files changed, 392 insertions(+), 392 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_vertex_fetch.c delete mode 100644 src/mesa/pipe/cell/ppu/cell_vertex_fetch.c (limited to 'src/gallium/drivers/cell') diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 0000000000..f2432f4ff5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,392 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +#include "pipe/cell/ppu/cell_context.h" +#include "ppc/rtasm/spe_asm.h" + +typedef uint64_t register_mask; + +int allocate_available_register(register_mask *m) +{ + unsigned i; + for (i = 0; i < 64; i++) { + const uint64_t mask = (1ULL << i); + + if ((m[0] & mask) != 0) { + m[0] &= ~mask; + return i; + } + } + + return -1; +} + + +int allocate_register(register_mask *m, unsigned reg) +{ + assert((m[0] & (1ULL << reg)) != 0); + + m[0] &= ~(1ULL << reg); + return reg; +} + + +void release_register(register_mask *m, unsigned reg) +{ + assert((m[0] & (1ULL << reg)) == 0); + + m[0] |= (1ULL << reg); +} + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p Function that the transpose operation is to be appended to + * \param m Live register mask + * \param row0 Register containing row 0 of the source matrix + * \param row1 Register containing row 1 of the source matrix + * \param row2 Register containing row 2 of the source matrix + * \param row3 Register containing row 3 of the source matrix + * \param dest_ptr Register containing the address of the destination matrix + * \param shuf_ptr Register containing the address of the shuffled data + * \param count Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + * + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, register_mask *m, + unsigned row0, unsigned row1, unsigned row2, + unsigned row3, unsigned dest_ptr, + unsigned shuf_ptr, unsigned count) +{ + int shuf_hi = allocate_available_register(m); + int shuf_lo = allocate_available_register(m); + int t1 = allocate_available_register(m); + int t2 = allocate_available_register(m); + int t3; + int t4; + int col0; + int col1; + int col2; + int col3; + + + spe_lqd(p, shuf_hi, shuf_ptr, 3); + spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_shufb(p, t1, row0, row2, shuf_hi); + spe_shufb(p, t2, row0, row2, shuf_lo); + + + /* row0 and row2 are now no longer needed. Re-use those registers as + * temporaries. + */ + t3 = row0; + t4 = row2; + + spe_shufb(p, t3, row1, row3, shuf_hi); + spe_shufb(p, t4, row1, row3, shuf_lo); + + + /* row1 and row3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col0 = row1; + col1 = row3; + + spe_shufb(p, col0, t1, t3, shuf_hi); + if (count > 1) { + spe_shufb(p, col1, t1, t3, shuf_lo); + } + + /* t1 and t3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col2 = t1; + col3 = t3; + + if (count > 2) { + spe_shufb(p, col2, t2, t4, shuf_hi); + } + + if (count > 3) { + spe_shufb(p, col3, t2, t4, shuf_lo); + } + + + /* Store the results. Remember that the stqd instruction is encoded using + * the qword offset (stand-alone assemblers to the byte-offset to + * qword-offset conversion for you), so the byte-offset needs be divided by + * 16. + */ + switch (count) { + case 4: + spe_stqd(p, col3, dest_ptr, 3); + case 3: + spe_stqd(p, col2, dest_ptr, 2); + case 2: + spe_stqd(p, col1, dest_ptr, 1); + case 1: + spe_stqd(p, col0, dest_ptr, 0); + } + + + /* Release all of the temporary registers used. + */ + release_register(m, col0); + release_register(m, col1); + release_register(m, col2); + release_register(m, col3); + release_register(m, shuf_hi); + release_register(m, shuf_lo); + release_register(m, t2); + release_register(m, t4); +} + + +static void +emit_fetch(struct spe_function *p, register_mask *m, + unsigned in_ptr, unsigned *offset, + unsigned out_ptr, unsigned shuf_ptr, + enum pipe_format format) +{ + const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); + const unsigned type = pf_type(format); + const unsigned bytes = pf_size_x(format); + + int v0 = allocate_available_register(m); + int v1 = allocate_available_register(m); + int v2 = allocate_available_register(m); + int v3 = allocate_available_register(m); + int tmp = allocate_available_register(m); + int float_zero = -1; + int float_one = -1; + float scale_signed = 0.0; + float scale_unsigned = 0.0; + + spe_lqd(p, v0, in_ptr, 0 + offset[0]); + spe_lqd(p, v1, in_ptr, 1 + offset[0]); + spe_lqd(p, v2, in_ptr, 2 + offset[0]); + spe_lqd(p, v3, in_ptr, 3 + offset[0]); + offset[0] += 4; + + switch (bytes) { + case 1: + scale_signed = 1.0f / 127.0f; + scale_unsigned = 1.0f / 255.0f; + spe_lqd(p, tmp, shuf_ptr, 1); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 2: + scale_signed = 1.0f / 32767.0f; + scale_unsigned = 1.0f / 65535.0f; + spe_lqd(p, tmp, shuf_ptr, 2); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 4: + scale_signed = 1.0f / 2147483647.0f; + scale_unsigned = 1.0f / 4294967295.0f; + break; + default: + assert(0); + break; + } + + switch (type) { + case PIPE_FORMAT_TYPE_FLOAT: + break; + case PIPE_FORMAT_TYPE_UNORM: + spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); + spe_cuflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_SNORM: + spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); + spe_csflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_USCALED: + spe_cuflt(p, v0, v0, 0); + break; + case PIPE_FORMAT_TYPE_SSCALED: + spe_csflt(p, v0, v0, 0); + break; + } + + + if (count < 4) { + float_one = allocate_available_register(m); + spe_il(p, float_one, 1); + spe_cuflt(p, float_one, float_one, 0); + + if (count < 3) { + float_zero = allocate_available_register(m); + spe_il(p, float_zero, 0); + } + } + + release_register(m, tmp); + + emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + + switch (count) { + case 1: + spe_stqd(p, float_zero, out_ptr, 1); + case 2: + spe_stqd(p, float_zero, out_ptr, 2); + case 3: + spe_stqd(p, float_one, out_ptr, 3); + } + + if (float_zero != -1) { + release_register(m, float_zero); + } + + if (float_one != -1) { + release_register(m, float_one); + } +} + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + register_mask m = ~0; + struct spe_function *p = &cell->attrib_fetch; + unsigned function_index[PIPE_ATTRIB_MAX]; + unsigned unique_attr_formats; + int out_ptr; + int in_ptr; + int shuf_ptr; + unsigned i; + unsigned j; + + + /* Determine how many unique input attribute formats there are. At the + * same time, store the index of the lowest numbered attribute that has + * the same format as any non-unique format. + */ + unique_attr_formats = 1; + function_index[0] = 0; + for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + + for (j = 0; j < i; j++) { + if (curr_fmt == draw->vertex_element[j].src_format) { + break; + } + } + + if (j == i) { + unique_attr_formats++; + } + + function_index[i] = j; + } + + + /* Each fetch function can be a maximum of 34 instructions (note: this is + * actually a slight over-estimate). That means (34 * 4) = 136 bytes + * each maximum. + */ + spe_init_func(p, 136 * unique_attr_formats); + + + /* Registers 0, 1, and 2 are reserved by the ABI. + */ + allocate_register(&m, 0); + allocate_register(&m, 1); + allocate_register(&m, 2); + + + /* Allocate registers for the function's input parameters. + */ + out_ptr = allocate_register(&m, 3); + in_ptr = allocate_register(&m, 4); + shuf_ptr = allocate_register(&m, 5); + + + /* Generate code for the individual attribute fetch functions. + */ + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + unsigned offset; + + if (function_index[i] == i) { + cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr + - (void *) p->store); + + offset = 0; + emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, + draw->vertex_element[i].src_format); + spe_bi(p, 0, 0, 0); + + /* Round up to the next 16-byte boundary. + */ + if ((((unsigned) p->store) & 0x0f) != 0) { + const unsigned align = ((unsigned) p->store) & 0x0f; + p->store = (uint32_t *) (((void *) p->store) + align); + } + } else { + /* Use the same function entry-point as a previously seen attribute + * with the same format. + */ + cell->attrib_fetch_offsets[i] = + cell->attrib_fetch_offsets[function_index[i]]; + } + } + + static first_time = 1; + if (first_time) { + first_time = 0; + const unsigned instructions = p->csr - p->store; + for (i = 0; i < instructions; i++) { + printf("\t.long\t0x%08x\n", p->store[i]); + } + } +} diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c b/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index f2432f4ff5..0000000000 --- a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" - -#include "pipe/cell/ppu/cell_context.h" -#include "ppc/rtasm/spe_asm.h" - -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param m Live register mask - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3); - case 3: - spe_stqd(p, col2, dest_ptr, 2); - case 2: - spe_stqd(p, col1, dest_ptr, 1); - case 1: - spe_stqd(p, col0, dest_ptr, 0); - } - - - /* Release all of the temporary registers used. - */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); -} - - -static void -emit_fetch(struct spe_function *p, register_mask *m, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = allocate_available_register(m); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = allocate_available_register(m); - spe_il(p, float_zero, 0); - } - } - - release_register(m, tmp); - - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1); - case 2: - spe_stqd(p, float_zero, out_ptr, 2); - case 3: - spe_stqd(p, float_one, out_ptr, 3); - } - - if (float_zero != -1) { - release_register(m, float_zero); - } - - if (float_one != -1) { - release_register(m, float_one); - } -} - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - register_mask m = ~0; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_ATTRIB_MAX]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. - */ - spe_init_func(p, 136 * unique_attr_formats); - - - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } -} -- cgit v1.2.3 From 3320b1874e810583f95b93a89697b2955987b84f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 15 Feb 2008 11:03:54 -0800 Subject: Cell: Enable code gen for SPE attribute fetch Doubles are still unsupported. --- src/gallium/drivers/cell/common.h | 15 +- src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.h | 4 + src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 15 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 21 +- src/gallium/drivers/cell/spu/spu_main.c | 22 +- src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 477 +--------------------- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 6 +- 8 files changed, 71 insertions(+), 490 deletions(-) (limited to 'src/gallium/drivers/cell') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 4de514c358..74b131fbef 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -90,6 +90,7 @@ #define CELL_CMD_STATE_VS_ARRAY_INFO 16 #define CELL_CMD_STATE_BLEND 17 #define CELL_CMD_VS_EXECUTE 18 +#define CELL_CMD_STATE_ATTRIB_FETCH 19 #define CELL_NUM_BUFFERS 4 @@ -128,13 +129,19 @@ struct cell_command_clear_surface */ struct cell_array_info { - uint64_t base; /**< Base address of the 0th element. */ - uint attr; /**< Attribute that this state is for. */ - uint pitch; /**< Byte pitch from one entry to the next. */ - uint format; /**< Pipe format of each entry. */ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint size; + uint function_offset; } ALIGN16_ATTRIB; +struct cell_attribute_fetch_code { + uint64_t base; + uint size; +}; + struct cell_shader_info { unsigned num_outputs; diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index a4c3f29e8a..196ab777f5 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -34,6 +34,7 @@ SOURCES = \ cell_surface.c \ cell_texture.c \ cell_vbuf.c \ + cell_vertex_fetch.c \ cell_vertex_shader.c \ cell_winsys.c diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 6196c0c72f..91f8e542a2 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" +#include "ppc/rtasm/spe_asm.h" struct cell_vbuf_render; @@ -111,6 +112,9 @@ struct cell_context /** [4] to ensure 16-byte alignment for each status word */ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + struct spe_function attrib_fetch; + unsigned attrib_fetch_offsets[PIPE_ATTRIB_MAX]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f2432f4ff5..f10689a959 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -27,10 +27,10 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" -#include "pipe/cell/ppu/cell_context.h" +#include "cell_context.h" #include "ppc/rtasm/spe_asm.h" typedef uint64_t register_mask; @@ -380,13 +380,4 @@ void cell_update_vertex_fetch(struct draw_context *draw) cell->attrib_fetch_offsets[function_index[i]]; } } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 0ba4506505..6a1d3bc20a 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -55,14 +55,32 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) uint64_t *batch; struct cell_array_info *array_info; unsigned i, j; + struct cell_attribute_fetch_code *cf; assert(draw->vs.queue_nr != 0); /* XXX: do this on statechange: */ draw_update_vertex_fetch(draw); + cell_update_vertex_fetch(draw); + + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); + batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; + cf = (struct cell_attribute_fetch_code *) (&batch[1]); + cf->base = cell->attrib_fetch.store; + cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr + - (void *) cell->attrib_fetch.store)); + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format format = draw->vertex_element[i].src_format; + const unsigned count = ((pf_size_x(format) != 0) + + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + + (pf_size_w(format) != 0)); + const unsigned size = pf_size_x(format) * count; + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; @@ -72,7 +90,8 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; array_info->attr = i; array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->format = draw->vertex_element[i].src_format; + array_info->size = size; + array_info->function_offset = cell->attrib_fetch_offsets[i]; } batch = cell_batch_alloc(cell, sizeof(batch[0]) diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1e7243b863..fcbf0f841e 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -54,6 +54,9 @@ struct spu_global spu; struct spu_vs_context draw; +static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -306,7 +309,8 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) ASSERT(attr < PIPE_ATTRIB_MAX); draw.vertex_fetch.src_ptr[attr] = vs_info->base; draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; draw.vertex_fetch.dirty = 1; } @@ -433,6 +437,22 @@ cmd_batch(uint opcode) cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_ATTRIB_FETCH: { + struct cell_attribute_fetch_code *code = + (struct cell_attribute_fetch_code *) &buffer[pos+1]; + + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + } default: printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 45e3c26c00..55c6c28717 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,10 +29,10 @@ /* * Authors: * Keith Whitwell + * Ian Romanick */ #include -#include #include "pipe/p_util.h" #include "pipe/p_state.h" @@ -59,6 +60,10 @@ #define DRAW_DBG 0 +typedef void (*spu_fetch_func)(qword *out, const qword *in, + const qword *shuffle_data); + + static const qword fetch_shuffle_data[] = { /* Shuffle used by CVT_64_FLOAT */ @@ -97,22 +102,6 @@ static const qword fetch_shuffle_data[] = { }; -static INLINE void -trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, - const qword *shuffle) -{ - qword t1 = si_shufb(row0, row2, shuffle[3]); - qword t2 = si_shufb(row0, row2, shuffle[4]); - qword t3 = si_shufb(row1, row3, shuffle[3]); - qword t4 = si_shufb(row1, row3, shuffle[4]); - - out[0] = si_shufb(t1, t3, shuffle[3]); - out[1] = si_shufb(t1, t3, shuffle[4]); - out[2] = si_shufb(t2, t4, shuffle[3]); - out[3] = si_shufb(t2, t4, shuffle[4]); -} - - /** * Fetch between 1 and 32 bytes from an unaligned address */ @@ -151,446 +140,6 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size) } -#define CVT_32_FLOAT(q, s) (*(q)) - -static INLINE qword -CVT_64_FLOAT(const qword *qw, const qword *shuffle) -{ - qword a = si_frds(qw[0]); - qword b = si_frds(si_rotqbyi(qw[0], 8)); - qword c = si_frds(qw[1]); - qword d = si_frds(si_rotqbyi(qw[1], 8)); - - qword ab = si_shufb(a, b, shuffle[0]); - qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); - - return si_or(ab, cd); -} - - -static INLINE qword -CVT_8_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_USCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_cuflt(*qw, 0); -} - -static INLINE qword -CVT_8_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_SSCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_csflt(*qw, 0); -} - - -static INLINE qword -CVT_8_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 255.0f); - return si_fm(CVT_8_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 65535.0f); - return si_fm(CVT_16_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); - return si_fm(CVT_32_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_8_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 127.0f); - return si_fm(CVT_8_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 32767.0f); - return si_fm(CVT_16_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); - return si_fm(CVT_32_SSCALED(qw, shuffle), scale); -} - -#define SZ_4 si_il(0U) -#define SZ_3 si_fsmbi(0x000f) -#define SZ_2 si_fsmbi(0x00ff) -#define SZ_1 si_fsmbi(0x0fff) - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ -static void \ -fetch_##NAME(qword *out, const qword *in, qword defaults, \ - const qword *shuffle) \ -{ \ - qword tmp[4]; \ - \ - tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ - tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ - tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ - tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ - trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ -} - - -FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) - - - -static spu_fetch_func get_fetch_func( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - assert(0); - return NULL; - } -} - - -static unsigned get_vertex_size( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return 8; - case PIPE_FORMAT_R64G64_FLOAT: - return 2 * 8; - case PIPE_FORMAT_R64G64B64_FLOAT: - return 3 * 8; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return 4 * 8; - - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32_USCALED: - case PIPE_FORMAT_R32_UNORM: - case PIPE_FORMAT_R32_FLOAT: - return 4; - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - return 2 * 4; - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32B32_USCALED: - case PIPE_FORMAT_R32G32B32_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - return 3 * 4; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_USCALED: - case PIPE_FORMAT_R32G32B32A32_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return 4 * 4; - - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16_USCALED: - return 2; - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16G16_UNORM: - return 2 * 2; - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16_UNORM: - return 3 * 2; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - return 4 * 2; - - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8_UNORM: - return 1; - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8_UNORM: - return 2 * 1; - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8G8B8_UNORM: - return 3 * 1; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return 4 * 1; - - case 0: - return 0; /* not sure why this is needed */ - - default: - assert(0); - return 0; - } -} - - /** * Fetch vertex attributes for 'count' vertices. */ @@ -612,10 +161,10 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { - const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; const unsigned pitch = draw->vertex_fetch.pitch[attr]; const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + const spu_fetch_func fetch = (spu_fetch_func) + (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); unsigned i; unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; @@ -644,8 +193,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* Convert all 4 vertices to vectors of float. */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, - fetch_shuffle_data); + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); } } @@ -662,12 +210,5 @@ void spu_update_vertex_fetch( struct spu_vs_context *draw ) } - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - draw->vertex_fetch.fetch[i] = - get_fetch_func(draw->vertex_fetch.format[i]); - draw->vertex_fetch.size[i] = - get_vertex_size(draw->vertex_fetch.format[i]); - } - draw->vertex_fetch.fetch_func = generic_vertex_fetch; } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index b5bf31e67d..0fb0bc28d0 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -6,8 +6,6 @@ struct spu_vs_context; -typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, - const qword *shuffle_data); typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, struct spu_exec_machine *machine, const unsigned *elts, @@ -20,12 +18,12 @@ struct spu_vs_context { uint64_t src_ptr[PIPE_ATTRIB_MAX]; unsigned pitch[PIPE_ATTRIB_MAX]; unsigned size[PIPE_ATTRIB_MAX]; - enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned code_offset[PIPE_ATTRIB_MAX]; unsigned nr_attrs; boolean dirty; - spu_fetch_func fetch[PIPE_ATTRIB_MAX]; spu_full_fetch_func fetch_func; + void *code; } vertex_fetch; /* Clip derived state: -- cgit v1.2.3