From b642730be93149baa7556e5791393168ab396175 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:35:24 +0900 Subject: Code reorganization: move files into their places. This is in a separate commit to ensure renames are properly preserved. --- src/gallium/drivers/cell/spu/spu_render.c | 301 ++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 src/gallium/drivers/cell/spu/spu_render.c (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..932fb500b3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include +#include +#include + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + -- cgit v1.2.3 From 6acd63a4980951727939c0dd545a0324965b3834 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 17:50:12 +0900 Subject: Code reorganization: update build. Update the Makefiles and includes for the new paths. Note that there hasn't been no separation of the Makefiles yet, and make is jumping all over the place. That will be taken care shortly. But for now, make should work. It was tested with linux and linux-dri. Linux-cell and linux-llvm might require some minor tweaks. --- configs/beos | 2 +- configs/darwin | 2 +- configs/darwin-x86ppc | 2 +- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/linux-cell | 2 +- configs/linux-directfb | 2 +- configs/linux-dri | 6 +- configs/linux-dri-xcb | 4 +- configs/linux-fbdev | 2 +- configs/linux-osmesa | 2 +- configs/linux-osmesa16 | 2 +- configs/linux-osmesa16-static | 2 +- configs/linux-osmesa32 | 2 +- configs/linux-solo | 2 +- src/gallium/Makefile | 12 +-- src/gallium/Makefile.template | 7 +- src/gallium/aux/Makefile | 24 +++++ src/gallium/aux/draw/draw_private.h | 2 +- src/gallium/aux/draw/draw_vertex.c | 4 +- src/gallium/aux/draw/draw_vertex_shader.c | 4 +- src/gallium/aux/llvm/Makefile | 4 +- src/gallium/aux/llvm/gallivm.cpp | 4 +- src/gallium/aux/llvm/gallivm_cpu.cpp | 4 +- src/gallium/aux/llvm/tgsitollvm.cpp | 10 +- src/gallium/aux/pipebuffer/Makefile | 2 +- src/gallium/aux/tgsi/exec/tgsi_exec.c | 4 +- src/gallium/aux/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/aux/tgsi/util/tgsi_transform.h | 4 +- src/gallium/drivers/Makefile | 24 +++++ src/gallium/drivers/cell/ppu/Makefile | 7 +- src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 6 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.c | 2 +- src/gallium/drivers/cell/ppu/cell_spu.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_blend.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_clip.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 4 +- src/gallium/drivers/cell/ppu/cell_state_fs.c | 8 +- .../drivers/cell/ppu/cell_state_rasterizer.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_sampler.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 6 +- src/gallium/drivers/cell/spu/Makefile | 6 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 4 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_render.h | 2 +- src/gallium/drivers/cell/spu/spu_tile.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 6 +- src/gallium/drivers/failover/Makefile | 2 +- src/gallium/drivers/i915simple/Makefile | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/i915simple/i915_strings.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i965simple/Makefile | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_strings.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/Makefile | 2 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.h | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 2 +- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 4 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 6 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_state_clip.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 6 +- src/gallium/drivers/softpipe/sp_state_fs.c | 8 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 4 +- src/gallium/drivers/softpipe/sp_state_vertex.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/winsys/dri/Makefile | 38 +++++++ src/gallium/winsys/dri/Makefile.template | 113 +++++++++++++++++++++ src/gallium/winsys/dri/intel/Makefile | 8 +- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 2 +- .../winsys/dri/intel/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 6 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/Makefile | 16 ++- src/mesa/drivers/x11/xm_api.c | 2 +- src/mesa/drivers/x11/xm_dd.c | 2 +- src/mesa/drivers/x11/xm_surface.c | 8 +- src/mesa/drivers/x11/xm_winsys.c | 2 +- src/mesa/drivers/x11/xmesaP.h | 4 +- src/mesa/sources | 83 +++++++-------- src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_cache.c | 4 +- src/mesa/state_tracker/st_cache.h | 2 +- src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 6 +- src/mesa/state_tracker/st_cb_program.c | 4 +- src/mesa/state_tracker/st_cb_rasterpos.c | 4 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_context.c | 4 +- src/mesa/state_tracker/st_debug.c | 4 +- src/mesa/state_tracker/st_draw.c | 4 +- src/mesa/state_tracker/st_gen_mipmap.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 4 +- 127 files changed, 445 insertions(+), 241 deletions(-) create mode 100644 src/gallium/aux/Makefile create mode 100644 src/gallium/drivers/Makefile create mode 100644 src/gallium/winsys/dri/Makefile create mode 100644 src/gallium/winsys/dri/Makefile.template (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/configs/beos b/configs/beos index f07973d0c7..2b74af739d 100644 --- a/configs/beos +++ b/configs/beos @@ -86,7 +86,7 @@ else endif # Directories -SRC_DIRS = mesa glu glut/beos +SRC_DIRS = gallium mesa glu glut/beos GLU_DIRS = sgi DRIVER_DIRS = beos PROGRAM_DIRS = beos samples redbook demos tests diff --git a/configs/darwin b/configs/darwin index 7826ecc605..bba7802696 100644 --- a/configs/darwin +++ b/configs/darwin @@ -25,5 +25,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/darwin-x86ppc b/configs/darwin-x86ppc index 13172327a7..ebeb25051f 100644 --- a/configs/darwin-x86ppc +++ b/configs/darwin-x86ppc @@ -29,5 +29,5 @@ GLW_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXt $(TOP)/lib/GL.dylib APP_LIB_DEPS = -L$(TOP)/lib -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm # omit glw lib for now: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx diff --git a/configs/default b/configs/default index 166205a1d3..25a87e66a1 100644 --- a/configs/default +++ b/configs/default @@ -60,7 +60,7 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = mesa glu glut/glx glw +SRC_DIRS = gallium mesa glu glut/glx glw GLU_DIRS = sgi DRIVER_DIRS = x11 osmesa # Which subdirs under $(TOP)/progs/ to enter: diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 402883d1de..67d253b869 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -36,7 +36,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/X11R6/lib -lGL -lXt -lX11 # Directories -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw DRIVER_DIRS = dri PROGRAM_DIRS = WINDOW_SYSTEM=dri diff --git a/configs/linux-cell b/configs/linux-cell index 3d874491e4..fdf20deeeb 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -21,7 +21,7 @@ CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$( CXXFLAGS = $(CFLAGS) # Omitting glw here: -SRC_DIRS = mesa glu glut/glx +SRC_DIRS = gallium mesa glu glut/glx MKDEP_OPTIONS = -fdepend -Y diff --git a/configs/linux-directfb b/configs/linux-directfb index 09332f4808..dff27f7850 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -22,7 +22,7 @@ ifeq ($(HAVE_X86), yes) endif # Directories -SRC_DIRS = mesa glu glut/directfb +SRC_DIRS = gallium mesa glu glut/directfb GLU_DIRS = sgi DRIVER_DIRS = directfb PROGRAM_DIRS = demos directfb diff --git a/configs/linux-dri b/configs/linux-dri index 936fce9982..e6135856fc 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,10 +54,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif @@ -66,4 +66,4 @@ WINDOW_SYSTEM=dri # gamma are missing because they have not been converted to use the new # interface. -DRI_DIRS = intel_winsys +DRI_DIRS = intel diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index aa292a13ec..ea4bdf1864 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -53,10 +53,10 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 mesa glu glut/glx glw +SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = egl else -SRC_DIRS = glx/x11 mesa glu glut/glx glw +SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw PROGRAM_DIRS = endif diff --git a/configs/linux-fbdev b/configs/linux-fbdev index e36d20a702..1ddccb3f52 100644 --- a/configs/linux-fbdev +++ b/configs/linux-fbdev @@ -6,7 +6,7 @@ CONFIG_NAME = linux-fbdev CFLAGS = -O3 -ffast-math -ansi -pedantic -fPIC -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DPTHREADS -DUSE_GLFBDEV_DRIVER -SRC_DIRS = mesa glu glut/fbdev +SRC_DIRS = gallium mesa glu glut/fbdev DRIVER_DIRS = fbdev osmesa PROGRAM_DIRS = fbdev demos redbook samples diff --git a/configs/linux-osmesa b/configs/linux-osmesa index cc1fbbd109..0382a19553 100644 --- a/configs/linux-osmesa +++ b/configs/linux-osmesa @@ -14,7 +14,7 @@ CXXFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOUR # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = osdemos diff --git a/configs/linux-osmesa16 b/configs/linux-osmesa16 index 1fb0186d31..9a527592f1 100644 --- a/configs/linux-osmesa16 +++ b/configs/linux-osmesa16 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa16.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa16-static b/configs/linux-osmesa16-static index 6645504478..1e6380b02e 100644 --- a/configs/linux-osmesa16-static +++ b/configs/linux-osmesa16-static @@ -18,7 +18,7 @@ OSMESA_LIB_NAME = libOSMesa16.a # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-osmesa32 b/configs/linux-osmesa32 index a1e5a358d6..f0ef1831b0 100644 --- a/configs/linux-osmesa32 +++ b/configs/linux-osmesa32 @@ -17,7 +17,7 @@ OSMESA_LIB_NAME = libOSMesa32.so # Directories -SRC_DIRS = mesa glu +SRC_DIRS = gallium mesa glu DRIVER_DIRS = osmesa PROGRAM_DIRS = diff --git a/configs/linux-solo b/configs/linux-solo index 220fe58b9a..d49b972228 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -43,7 +43,7 @@ GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -lpthread # Directories -SRC_DIRS = glx/mini mesa glu glut/mini +SRC_DIRS = glx/mini gallium mesa glu glut/mini DRIVER_DIRS = dri PROGRAM_DIRS = miniglx diff --git a/src/gallium/Makefile b/src/gallium/Makefile index d880d090c1..a13b9a52d3 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -1,16 +1,8 @@ -TOP = ../../.. +TOP = ../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) $(LLVM_DIR) +SUBDIRS = aux drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 8e84f8eb2d..0717ed8dd2 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -15,7 +15,10 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/include/pipe \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ $(DRIVER_INCLUDES) @@ -38,7 +41,7 @@ INCLUDES = \ default: depend symlinks $(LIBNAME) -$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/mesa/pipe/Makefile.template +$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) diff --git a/src/gallium/aux/Makefile b/src/gallium/aux/Makefile new file mode 100644 index 0000000000..da68498aa1 --- /dev/null +++ b/src/gallium/aux/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_DIR = llvm +endif + +SUBDIRS = pipebuffer $(LLVM_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/aux/draw/draw_private.h b/src/gallium/aux/draw/draw_private.h index b17eaaed65..3d09aef87c 100644 --- a/src/gallium/aux/draw/draw_private.h +++ b/src/gallium/aux/draw/draw_private.h @@ -45,7 +45,7 @@ #include "pipe/p_defines.h" #include "x86/rtasm/x86sse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" struct gallivm_prog; diff --git a/src/gallium/aux/draw/draw_vertex.c b/src/gallium/aux/draw/draw_vertex.c index 2d6592150f..daf1ef4b80 100644 --- a/src/gallium/aux/draw/draw_vertex.c +++ b/src/gallium/aux/draw/draw_vertex.c @@ -34,8 +34,8 @@ */ -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/aux/draw/draw_vertex_shader.c b/src/gallium/aux/draw/draw_vertex_shader.c index c824c1407e..377ecbb931 100644 --- a/src/gallium/aux/draw/draw_vertex_shader.c +++ b/src/gallium/aux/draw/draw_vertex_shader.c @@ -34,13 +34,13 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #if defined(__i386__) || defined(__386__) -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "draw_private.h" #include "draw_context.h" #include "x86/rtasm/x86sse.h" -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #define DBG_VS 0 diff --git a/src/gallium/aux/llvm/Makefile b/src/gallium/aux/llvm/Makefile index 9c6e16d86b..e6ac399d08 100644 --- a/src/gallium/aux/llvm/Makefile +++ b/src/gallium/aux/llvm/Makefile @@ -30,7 +30,9 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/mesa/pipe \ + -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/aux/llvm/gallivm.cpp b/src/gallium/aux/llvm/gallivm.cpp index da0105c2c9..d14bb3b99a 100644 --- a/src/gallium/aux/llvm/gallivm.cpp +++ b/src/gallium/aux/llvm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/gallivm_cpu.cpp b/src/gallium/aux/llvm/gallivm_cpu.cpp index dc4d92a72a..8f9830d0b1 100644 --- a/src/gallium/aux/llvm/gallivm_cpu.cpp +++ b/src/gallium/aux/llvm/gallivm_cpu.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" #include #include diff --git a/src/gallium/aux/llvm/tgsitollvm.cpp b/src/gallium/aux/llvm/tgsitollvm.cpp index 0de595e678..2cb4acce32 100644 --- a/src/gallium/aux/llvm/tgsitollvm.cpp +++ b/src/gallium/aux/llvm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" #include diff --git a/src/gallium/aux/pipebuffer/Makefile b/src/gallium/aux/pipebuffer/Makefile index 75764a9a18..588629e870 100644 --- a/src/gallium/aux/pipebuffer/Makefile +++ b/src/gallium/aux/pipebuffer/Makefile @@ -17,7 +17,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/aux/tgsi/exec/tgsi_exec.c b/src/gallium/aux/tgsi/exec/tgsi_exec.c index 37e6007068..a8f64c2287 100644 --- a/src/gallium/aux/tgsi/exec/tgsi_exec.c +++ b/src/gallium/aux/tgsi/exec/tgsi_exec.c @@ -54,8 +54,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #define TILE_TOP_LEFT 0 diff --git a/src/gallium/aux/tgsi/exec/tgsi_sse2.c b/src/gallium/aux/tgsi/exec/tgsi_sse2.c index 1e56e4afb6..593464db3e 100755 --- a/src/gallium/aux/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/aux/tgsi/exec/tgsi_sse2.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "tgsi_exec.h" #include "tgsi_sse2.h" diff --git a/src/gallium/aux/tgsi/util/tgsi_transform.h b/src/gallium/aux/tgsi/util/tgsi_transform.h index 365d8c298c..fcf85d603b 100644 --- a/src/gallium/aux/tgsi/util/tgsi_transform.h +++ b/src/gallium/aux/tgsi/util/tgsi_transform.h @@ -31,8 +31,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..c0345a9cb5 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_DIR = cell +endif + +SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 50060f5cd3..011863c11e 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -40,8 +40,11 @@ SOURCES = \ OBJECTS = $(SOURCES:.c=.o) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa - +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 07b908eec5..e588a30d5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,7 +35,7 @@ #include #include "pipe/p_inlines.h" #include "pipe/p_util.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_batch.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index bbe1fd7a11..e1eb22f468 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -37,9 +37,9 @@ #include "pipe/p_format.h" #include "pipe/p_util.h" #include "pipe/p_winsys.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 3b63419b5e..6196c0c72f 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -32,10 +32,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" #include "cell_winsys.h" -#include "pipe/cell/common.h" +#include "cell/common.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 717cd8370f..f12613649b 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -39,7 +39,7 @@ #include "cell_draw_arrays.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index f62bc4650c..20f27531fc 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -31,7 +31,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_render.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index 4ab277a4b2..b663b37622 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -34,7 +34,7 @@ #include "cell_render.h" #include "cell_spu.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" struct render_stage { diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 7c83a47e57..419e74dc40 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -31,7 +31,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/cell/common.h" +#include "cell/common.h" /* diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 19eff94f96..137f26612e 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -31,7 +31,7 @@ #include #include -#include "pipe/cell/common.h" +#include "cell/common.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_blend.c b/src/gallium/drivers/cell/ppu/cell_state_blend.c index 4fc60548c8..b6d6d71f0c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_blend.c +++ b/src/gallium/drivers/cell/ppu/cell_state_blend.c @@ -29,7 +29,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_clip.c b/src/gallium/drivers/cell/ppu/cell_state_clip.c index 4f43665941..0482f87e88 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_clip.c +++ b/src/gallium/drivers/cell/ppu/cell_state_clip.c @@ -30,7 +30,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void cell_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 56daf5dfde..0c46829258 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "cell_context.h" #include "cell_batch.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_fs.c b/src/gallium/drivers/cell/ppu/cell_state_fs.c index 3f46a87d18..b2ed699a5b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_fs.c +++ b/src/gallium/drivers/cell/ppu/cell_state_fs.c @@ -29,12 +29,12 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #if 0 #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" #endif #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c index d8128ece54..7eca5b5765 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c +++ b/src/gallium/drivers/cell/ppu/cell_state_rasterizer.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_sampler.c b/src/gallium/drivers/cell/ppu/cell_state_sampler.c index ade6cc8338..a33421a4ad 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_sampler.c +++ b/src/gallium/drivers/cell/ppu/cell_state_sampler.c @@ -30,7 +30,7 @@ */ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 0f01e920f9..563831b62d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -32,7 +32,7 @@ #include "cell_context.h" #include "cell_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index fca93e4742..a35db0ef99 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e9fafe492e..cc727ff4ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -36,7 +36,7 @@ #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 80dd500b34..0ba4506505 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -38,9 +38,9 @@ #include "cell_spu.h" #include "cell_batch.h" -#include "pipe/cell/common.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** * Run the vertex shader on all vertices in the vertex queue. diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index f202971d73..7aa947299e 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -31,7 +31,11 @@ SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ -INCLUDE_DIRS = -I$(TOP)/src/mesa +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index e51008b9b3..109540b1f7 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -67,8 +67,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index b4c7661ef6..3e17c490d2 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e375197fe6..1e7243b863 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -38,7 +38,7 @@ #include "spu_tile.h" //#include "spu_test.h" #include "spu_vertex_shader.h" -#include "pipe/cell/common.h" +#include "cell/common.h" #include "pipe/p_defines.h" diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 1710a17512..5c95d112ac 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -31,8 +31,8 @@ #include -#include "pipe/cell/common.h" -#include "pipe/draw/draw_vertex.h" +#include "cell/common.h" +#include "draw/draw_vertex.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 932fb500b3..20e77aa2e6 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -34,7 +34,7 @@ #include "spu_render.h" #include "spu_tri.h" #include "spu_tile.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h index fbcdc5ec31..493434f087 100644 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -29,7 +29,7 @@ #ifndef SPU_RENDER_H #define SPU_RENDER_H -#include "pipe/cell/common.h" +#include "cell/common.h" extern void cmd_render(const struct cell_command_render *render, uint *pos_incr); diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index e53340a55a..3105b848fd 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -32,7 +32,7 @@ #include #include #include "spu_main.h" -#include "pipe/cell/common.h" +#include "cell/common.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ac373240c1..ea4274a0a7 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" //#include "tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index c1cbbb6d1e..3f5bf41aa2 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -39,9 +39,9 @@ #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" #include "spu_exec.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cell/common.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" #include "spu_main.h" static INLINE unsigned diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 72d0895c74..14389bd055 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -15,7 +15,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 2f91de3afc..ee22ba86f9 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -32,7 +32,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 497623a700..7f71f8fd4f 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -32,7 +32,7 @@ #include "i915_texture.h" #include "i915_reg.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index b4ea63c3e7..2d876925b2 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 868f0c7e04..6c1524c768 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,9 +33,9 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" /** diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index c4a706c37d..44c4325936 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/draw/draw_private.h" +#include "draw/draw_private.h" #include "pipe/p_util.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e069773fd4..c5bf6174f6 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,7 +38,7 @@ */ -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index abd5571b88..294e6fad03 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -29,7 +29,7 @@ */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_winsys.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 653983e4a9..4767584fc6 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -27,8 +27,8 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_strings.c b/src/gallium/drivers/i915simple/i915_strings.c index c713bf7208..301fedea19 100644 --- a/src/gallium/drivers/i915simple/i915_strings.c +++ b/src/gallium/drivers/i915simple/i915_strings.c @@ -70,7 +70,7 @@ static const char *i915_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i915 (chipset: %s)", chipset); + sprintf(buffer, "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index de0cc5fe06..17fd27895a 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -33,7 +33,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 48c00ab50b..1dec1f9749 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -61,6 +61,6 @@ ASM_SOURCES = DRIVER_DEFINES = -I. -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 431b45466a..a762a870fe 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 95dfce88e4..f746d1cc57 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_strings.c b/src/gallium/drivers/i965simple/brw_strings.c index 29a41ed1e9..3d9c50961f 100644 --- a/src/gallium/drivers/i965simple/brw_strings.c +++ b/src/gallium/drivers/i965simple/brw_strings.c @@ -59,7 +59,7 @@ static const char *brw_get_name( struct pipe_context *pipe ) break; } - sprintf(buffer, "pipe/i965 (chipset: %s)", chipset); + sprintf(buffer, "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 518845e4b2..376a42b1a6 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -32,7 +32,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" /* diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 98915ba101..05df4860ed 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index b45a333a2e..97418a52e7 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index d95645d108..44f946ea74 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 31438a882e..2304ea4246 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -44,7 +44,7 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index cea6b90104..5e98f190bb 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index aff8c2cc5d..8c79cb3ce4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_vertex.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 71a303a8b5..2049afda34 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,7 +38,7 @@ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index ced0d5d098..2cbd0d7cab 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_flush.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 0ae31d8796..9cf8222133 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -31,7 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 2772048661..d73521ccbe 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -38,8 +38,8 @@ #include "sp_quad.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 7f71fdb6a9..69bea8a8f5 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -39,9 +39,9 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vbuf.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 3316858413..b4c01a7ea8 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -42,7 +42,7 @@ #include "x86/rtasm/x86sse.h" #ifdef MESA_LLVM -#include "pipe/llvm/gallivm.h" +#include "llvm/gallivm.h" #endif #include "sp_context.h" diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 6a8a43aeda..adf9ccf64c 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c index 08c5f06d05..c797c0dd3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_clip.c +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -29,7 +29,7 @@ */ #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void softpipe_set_clip_state( struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 372597869f..9d8fd8b750 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -27,9 +27,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 0b814fc284..1e3cadd43d 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,11 +32,11 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/llvm/gallivm.h" -#include "pipe/tgsi/util/tgsi_dump.h" -#include "pipe/tgsi/exec/tgsi_sse2.h" +#include "llvm/gallivm.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/exec/tgsi_sse2.h" void * diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 53755099dd..98e04352db 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "sp_context.h" #include "sp_state.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index ea348c7e95..460adccec4 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -31,14 +31,14 @@ #include "pipe/p_util.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_context.h" #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index 09ff540ccf..f01a10de3b 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -33,7 +33,7 @@ #include "sp_state.h" #include "sp_surface.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_context.h" void diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 8802ced187..653449c4f1 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -29,7 +29,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 325bdb86da..2f82fd6abe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "pipe/tgsi/exec/tgsi_exec.h" +#include "tgsi/exec/tgsi_exec.h" /* diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1597361b82..dde3fabc81 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -34,7 +34,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_tile_cache.h" diff --git a/src/gallium/winsys/dri/Makefile b/src/gallium/winsys/dri/Makefile new file mode 100644 index 0000000000..f466ce6c3c --- /dev/null +++ b/src/gallium/winsys/dri/Makefile @@ -0,0 +1,38 @@ +# src/mesa/drivers/dri/Makefile + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +default: $(TOP)/$(LIB_DIR) subdirs + + +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) + + +subdirs: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +install: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) ; \ + fi \ + done + -rm -f common/*.o diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template new file mode 100644 index 0000000000..b96305c094 --- /dev/null +++ b/src/gallium/winsys/dri/Makefile.template @@ -0,0 +1,113 @@ +# -*-makefile-*- + +MESA_MODULES = $(TOP)/src/mesa/libmesa.a + +COMMON_GALLIUM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/utils.c \ + $(TOP)/src/mesa/drivers/dri/common/vblank.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_util.c \ + $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + $(TOP)/src/mesa/drivers/dri/common/texmem.c \ + $(TOP)/src/mesa/drivers/dri/common/drirenderbuffer.c + +COMMON_BM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c + + +ifeq ($(WINDOW_SYSTEM),dri) +WINOBJ= +WINLIB= +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + +else +# miniglx +WINOBJ= +WINLIB=-L$(MESA)/src/glx/mini +MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini +INCLUDES = $(MINIGLX_INCLUDES) \ + $(SHARED_INCLUDES) \ + $(PCIACCESS_CFLAGS) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(MINIGLX_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) +endif + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) + + +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) + + +depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +install: $(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + +include depend diff --git a/src/gallium/winsys/dri/intel/Makefile b/src/gallium/winsys/dri/intel/Makefile index 9ae0f01325..40654bb2ac 100644 --- a/src/gallium/winsys/dri/intel/Makefile +++ b/src/gallium/winsys/dri/intel/Makefile @@ -7,8 +7,8 @@ LIBNAME = i915tex_dri.so MINIGLX_SOURCES = server/intel_dri.c PIPE_DRIVERS = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i915simple/libi915simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a DRIVER_SOURCES = \ intel_winsys_pipe.c \ @@ -28,11 +28,11 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ +DRIVER_DEFINES = -I$(TOP)/src/mesa/drivers/dri/intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") include ../Makefile.template -intel_tex_layout.o: ../intel/intel_tex_layout.c +intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c symlinks: diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 1ba6a9e1b2..0ed3890e93 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,7 +39,7 @@ #include "intel_winsys.h" #include "pipe/p_util.h" -#include "pipe/i915simple/i915_winsys.h" +#include "i915simple/i915_winsys.h" struct intel_i915_winsys { diff --git a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c index cec3437831..9e483bdc9f 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_softpipe.c @@ -34,7 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" struct intel_softpipe_winsys { diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index c3cd22eea3..8da596d419 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -41,11 +41,11 @@ #include "pipe/p_context.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL -#include "pipe/cell/ppu/cell_context.h" -#include "pipe/cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_winsys.h" #else #define TILE_SIZE 32 /* avoid compilation errors */ #endif diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index bf41570257..dbfd37bda2 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "pipe/i965simple/brw_winsys.h" +#include "i965simple/brw_winsys.h" #include "brw_aub.h" #include "xm_winsys_aub.h" diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 720f1b2e02..561608fedd 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -12,16 +12,16 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) PIPE_LIB = \ - $(TOP)/src/mesa/pipe/softpipe/libsoftpipe.a \ - $(TOP)/src/mesa/pipe/i965simple/libi965simple.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i965simple/libi965simple.a ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/mesa/pipe/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/mesa/pipe/cell/spu/g3d_spu.a +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/mesa/pipe/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a endif .SUFFIXES : .cpp @@ -71,7 +71,7 @@ libmesa.a: $(SOLO_OBJECTS) fi linux-solo: depend subdirs libmesa.a - cd drivers/dri ; $(MAKE) + cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) ##################################################################### @@ -165,7 +165,6 @@ depend: $(ALL_SOURCES) subdirs: @ (cd x86 ; $(MAKE)) @ (cd x86-64 ; $(MAKE)) - (cd pipe ; $(MAKE)) install: default $(INSTALL) -d $(INSTALL_DIR)/include/GL @@ -178,7 +177,7 @@ install: default $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd drivers/dri ; $(MAKE) install ; \ + cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ fi ## NOT INSTALLED YET: @@ -198,7 +197,6 @@ clean: (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) - (cd pipe ; $(MAKE) clean ) include depend diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 08c98eab48..18b033666f 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -85,7 +85,7 @@ #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "pipe/p_defines.h" /** diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 8ae243ae66..34287effe1 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -53,7 +53,7 @@ #include "tnl/tnl.h" #include "tnl/t_context.h" -#include "pipe/softpipe/sp_context.h" +#include "softpipe/sp_context.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" #include "state_tracker/st_draw.h" diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 5533158ece..81616b92d9 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -45,10 +45,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_clear.h" -#include "pipe/softpipe/sp_tile_cache.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_clear.h" +#include "softpipe/sp_tile_cache.h" +#include "softpipe/sp_surface.h" #include "state_tracker/st_context.h" diff --git a/src/mesa/drivers/x11/xm_winsys.c b/src/mesa/drivers/x11/xm_winsys.c index a690df2772..2edc697693 100644 --- a/src/mesa/drivers/x11/xm_winsys.c +++ b/src/mesa/drivers/x11/xm_winsys.c @@ -38,7 +38,7 @@ #include "main/macros.h" #include "pipe/p_winsys.h" -#include "pipe/softpipe/sp_winsys.h" +#include "softpipe/sp_winsys.h" /** diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 4709d63394..fd2dfcd79a 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -37,8 +37,8 @@ #include "xm_image.h" #endif #include "state_tracker/st_cb_fbo.h" -#include "pipe/softpipe/sp_context.h" -#include "pipe/softpipe/sp_surface.h" +#include "softpipe/sp_context.h" +#include "softpipe/sp_surface.h" extern _glthread_Mutex _xmesa_lock; diff --git a/src/mesa/sources b/src/mesa/sources index 1165425183..2d07738210 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - pipe/draw/draw_clip.c \ - pipe/draw/draw_context.c\ - pipe/draw/draw_cull.c \ - pipe/draw/draw_debug.c \ - pipe/draw/draw_flatshade.c \ - pipe/draw/draw_offset.c \ - pipe/draw/draw_prim.c \ - pipe/draw/draw_stipple.c \ - pipe/draw/draw_twoside.c \ - pipe/draw/draw_unfilled.c \ - pipe/draw/draw_validate.c \ - pipe/draw/draw_vbuf.c \ - pipe/draw/draw_vertex.c \ - pipe/draw/draw_vertex_cache.c \ - pipe/draw/draw_vertex_fetch.c \ - pipe/draw/draw_vertex_shader.c \ - pipe/draw/draw_vf.c \ - pipe/draw/draw_vf_generic.c \ - pipe/draw/draw_vf_sse.c \ - pipe/draw/draw_wide_prims.c + $(TOP)/src/gallium/aux/draw/draw_clip.c \ + $(TOP)/src/gallium/aux/draw/draw_context.c\ + $(TOP)/src/gallium/aux/draw/draw_cull.c \ + $(TOP)/src/gallium/aux/draw/draw_debug.c \ + $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ + $(TOP)/src/gallium/aux/draw/draw_offset.c \ + $(TOP)/src/gallium/aux/draw/draw_prim.c \ + $(TOP)/src/gallium/aux/draw/draw_stipple.c \ + $(TOP)/src/gallium/aux/draw/draw_twoside.c \ + $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ + $(TOP)/src/gallium/aux/draw/draw_validate.c \ + $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/aux/draw/draw_vf.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/aux/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - pipe/tgsi/exec/tgsi_exec.c \ - pipe/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - pipe/tgsi/util/tgsi_build.c \ - pipe/tgsi/util/tgsi_dump.c \ - pipe/tgsi/util/tgsi_parse.c \ - pipe/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - pipe/cso_cache/cso_hash.c \ - pipe/cso_cache/cso_cache.c + $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/aux/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - pipe/util/p_debug.c \ - pipe/util/p_tile.c \ - pipe/util/p_util.c + $(TOP)/src/gallium/aux/util/p_debug.c \ + $(TOP)/src/gallium/aux/util/p_tile.c \ + $(TOP)/src/gallium/aux/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -331,13 +331,13 @@ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c X11_DRIVER_SOURCES = \ - pipe/xlib/glxapi.c \ - pipe/xlib/fakeglx.c \ - pipe/xlib/xfonts.c \ - pipe/xlib/xm_api.c \ - pipe/xlib/xm_winsys.c \ - pipe/xlib/xm_winsys_aub.c \ - pipe/xlib/brw_aub.c + $(TOP)/src/gallium/winsys/xlib/glxapi.c \ + $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ + $(TOP)/src/gallium/winsys/xlib/xfonts.c \ + $(TOP)/src/gallium/winsys/xlib/xm_api.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ + $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ + $(TOP)/src/gallium/winsys/xlib/brw_aub.c OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c @@ -425,7 +425,10 @@ FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/aux OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -435,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/src/mesa/pipe/tgsi + -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 2c6ec8421b..b67b620eaa 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_cache.h" diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index e0965b217a..2979e7fae5 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -36,8 +36,8 @@ #include "pipe/p_state.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/cso_cache/cso_hash.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" /* Those function will either find the state of the given template diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h index e0c176b0ff..b81de316ec 100644 --- a/src/mesa/state_tracker/st_cache.h +++ b/src/mesa/state_tracker/st_cache.h @@ -33,7 +33,7 @@ #ifndef ST_CACHE_H #define ST_CACHE_H -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" struct pipe_blend_state; struct pipe_sampler_state; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 3a3bf9016d..663c4f205d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -43,7 +43,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index f13199a3c0..e2d4e06da1 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -56,7 +56,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 31744151f1..5315294c07 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -53,10 +53,10 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" /** diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index af3ee65504..61d4f4c41c 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -39,8 +39,8 @@ #include "shader/programopt.h" #include "shader/shader_api.h" -#include "pipe/cso_cache/cso_cache.h" -#include "pipe/draw/draw_context.h" +#include "cso_cache/cso_cache.h" +#include "draw/draw_context.h" #include "st_context.h" #include "st_program.h" diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 7e347c4893..5b0eb6022b 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -44,8 +44,8 @@ #include "st_draw.h" #include "st_cb_rasterpos.h" #include "st_draw.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 868c5f3c5f..c89c74229e 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #include "st_context.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 91a40288cc..03dbb30b0f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -47,7 +47,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/util/p_tile.h" +#include "util/p_tile.h" #define DBG if (0) printf diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index bf4618bed8..09e389f9dc 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -54,8 +54,8 @@ #include "pipe/p_context.h" #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cso_cache/cso_cache.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_cache.h" /** diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 57450e52bf..5888bcb98a 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,9 +31,9 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_dump.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_debug.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index ae9f5c8b11..1c0fa8c6aa 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -47,8 +47,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_inlines.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" static GLuint double_types[4] = { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 459941cca8..6c09b86033 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -37,7 +37,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "pipe/cso_cache/cso_cache.h" +#include "cso_cache/cso_cache.h" #include "st_context.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 325aa20173..97206752af 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index c8297baded..dc992ee9c2 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -38,8 +38,8 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "pipe/draw/draw_context.h" -#include "pipe/tgsi/util/tgsi_dump.h" +#include "draw/draw_context.h" +#include "tgsi/util/tgsi_dump.h" #include "st_context.h" #include "st_cache.h" -- cgit v1.2.3 From 1936e4bdfd776f78f9fe44f77ce66066fd166360 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 17 Mar 2008 15:45:52 -0700 Subject: cell: Initial code-gen for alpha / stencil / depth testing Alpha test is currently broken because all per-fragment testing occurs before alpha is calculated. Stencil test is currently broken because the Z-clear code asserts if there is a stencil buffer. --- src/gallium/drivers/cell/common.h | 10 + src/gallium/drivers/cell/ppu/Makefile | 1 + src/gallium/drivers/cell/ppu/cell_context.h | 25 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 37 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 24 +- .../drivers/cell/ppu/cell_state_per_fragment.c | 1020 ++++++++++++++++++++ .../drivers/cell/ppu/cell_state_per_fragment.h | 35 + src/gallium/drivers/cell/spu/Makefile | 1 + src/gallium/drivers/cell/spu/spu_main.c | 25 +- src/gallium/drivers/cell/spu/spu_main.h | 16 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 191 ++++ src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 32 + src/gallium/drivers/cell/spu/spu_render.c | 4 +- src/gallium/drivers/cell/spu/spu_tri.c | 23 +- src/gallium/drivers/cell/spu/spu_ztest.h | 135 --- 15 files changed, 1409 insertions(+), 170 deletions(-) create mode 100644 src/gallium/drivers/cell/ppu/cell_state_per_fragment.c create mode 100644 src/gallium/drivers/cell/ppu/cell_state_per_fragment.h create mode 100644 src/gallium/drivers/cell/spu/spu_per_fragment_op.c create mode 100644 src/gallium/drivers/cell/spu/spu_per_fragment_op.h delete mode 100644 src/gallium/drivers/cell/spu/spu_ztest.h (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 9a4004535e..fe93fd8e1a 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -104,6 +104,16 @@ +/** + */ +struct cell_command_depth_stencil_alpha_test { + uint64_t base; /**< Effective address of code start. */ + unsigned size; /**< Size in bytes of test code. */ + unsigned read_depth; /**< Flag: should depth be read? */ + unsigned read_stencil; /**< Flag: should stencil be read? */ +}; + + /** * Tell SPUs about the framebuffer size, location */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index d38fa6ce07..0389a9554c 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -27,6 +27,7 @@ SOURCES = \ cell_flush.c \ cell_state_derived.c \ cell_state_emit.c \ + cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index b221424323..9e79db0ace 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -57,16 +57,37 @@ struct cell_fragment_shader_state }; +struct cell_blend_state { + struct pipe_blend_state base; + + /** + * Generated code to perform alpha blending + */ + struct spe_function code; +}; + + +struct cell_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state base; + + /** + * Generated code to perform alpha, stencil, and depth testing on the SPE + */ + struct spe_function code; + +}; + + struct cell_context { struct pipe_context pipe; struct cell_winsys *winsys; - const struct pipe_blend_state *blend; + const struct cell_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct cell_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 025ed3bbbf..66ede99d13 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -36,6 +36,7 @@ #include "cell_context.h" #include "cell_state.h" #include "cell_texture.h" +#include "cell_state_per_fragment.h" @@ -43,7 +44,12 @@ static void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - return mem_dup(blend, sizeof(*blend)); + struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); + + (void) memcpy(cb, blend, sizeof(*blend)); + cb->code.store = NULL; + + return cb; } @@ -54,7 +60,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) draw_flush(cell->draw); - cell->blend = (const struct pipe_blend_state *)blend; + cell->blend = (const struct cell_blend_state *)blend; cell->dirty |= CELL_NEW_BLEND; } @@ -63,7 +69,10 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { - FREE(blend); + struct cell_blend_state *cb = (struct cell_blend_state *) blend; + + spe_release_func(& cb->code); + FREE(cb); } @@ -87,7 +96,13 @@ static void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { - return mem_dup(depth_stencil, sizeof(*depth_stencil)); + struct cell_depth_stencil_alpha_state *cdsa = + MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); + + (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); + cdsa->code.store = NULL; + + return cdsa; } @@ -96,12 +111,16 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth_stencil) { struct cell_context *cell = cell_context(pipe); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth_stencil; draw_flush(cell->draw); - cell->depth_stencil - = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; + if (cdsa->code.store == NULL) { + cell_generate_depth_stencil_test(cdsa); + } + cell->depth_stencil = cdsa; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } @@ -109,7 +128,11 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, static void cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) { - FREE(depth); + struct cell_depth_stencil_alpha_state *cdsa = + (struct cell_depth_stencil_alpha_state *) depth; + + spe_release_func(& cdsa->code); + FREE(cdsa); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 670eb26bdd..c8d5fdf709 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -71,9 +71,27 @@ cell_emit_state(struct cell_context *cell) } if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, - cell->depth_stencil, - sizeof(struct pipe_depth_stencil_alpha_state)); + struct cell_command_depth_stencil_alpha_test dsat; + + + dsat.base = (intptr_t) cell->depth_stencil->code.store; + dsat.size = (char *) cell->depth_stencil->code.csr + - (char *) cell->depth_stencil->code.store; + dsat.read_depth = TRUE; + dsat.read_stencil = FALSE; + + { + uint32_t *p = cell->depth_stencil->code.store; + + printf("\t.text\n"); + for (/* empty */; p < cell->depth_stencil->code.csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } + + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, + sizeof(dsat)); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 0000000000..60b64438d8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1020 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa Current alpha-test state + * \param f Function to which code should be appended + * \param mask Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int alphas) +{ + /* If the alpha function is either NEVER or ALWAYS, there is no need to + * load the reference value into a register. ALWAYS is a fairly common + * case, and this optimization saves 2 instructions. + */ + if (dsa->alpha.enabled + && (dsa->alpha.func != PIPE_FUNC_NEVER) + && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + int ref = spe_allocate_available_register(f); + int tmp_a = spe_allocate_available_register(f); + int tmp_b = spe_allocate_available_register(f); + union { + float f; + unsigned u; + } ref_val; + boolean complement = FALSE; + + ref_val.f = dsa->alpha.ref; + + spe_il(f, ref, ref_val.u & 0x0000ffff); + spe_ilh(f, ref, ref_val.u >> 16); + + switch (dsa->alpha.func) { + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_EQUAL: + spe_fceq(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GREATER: + spe_fcgt(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GEQUAL: + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; + + case PIPE_FUNC_ALWAYS: + case PIPE_FUNC_NEVER: + default: + assert(0); + break; + } + + if (complement) { + spe_andc(f, mask, mask, tmp_a); + } else { + spe_and(f, mask, mask, tmp_a); + } + + spe_release_register(f, ref); + spe_release_register(f, tmp_a); + spe_release_register(f, tmp_b); + } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { + spe_il(f, mask, 0); + } +} + + +/** + * \param dsa Current depth-test state + * \param f Function to which code should be appended + * \param m Mask of allocated / free SPE registers + * \param mask Index of register to contain depth-pass mask + * \param stored Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned. If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int stored, int calculated) +{ + unsigned func = (dsa->depth.enabled) + ? dsa->depth.func : PIPE_FUNC_ALWAYS; + int tmp = spe_allocate_available_register(f); + boolean compliment = FALSE; + + switch (func) { + case PIPE_FUNC_NEVER: + spe_il(f, mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceq(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgt(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LESS: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgt(f, mask, calculated, stored); + spe_ceq(f, tmp, calculated, stored); + spe_or(f, mask, mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + spe_il(f, mask, ~0); + break; + + default: + assert(0); + break; + } + + spe_release_register(f, tmp); + return compliment; +} + + +/** + * \note Emits a maximum of 5 instructions. + */ +static void +emit_stencil_op(struct spe_function *f, + int out, int in, int mask, unsigned op, unsigned ref) +{ + const int clamp = spe_allocate_available_register(f); + const int tmp = spe_allocate_available_register(f); + + switch(op) { + case PIPE_STENCIL_OP_KEEP: + assert(0); + case PIPE_STENCIL_OP_ZERO: + spe_il(f, out, 0); + break; + case PIPE_STENCIL_OP_REPLACE: + spe_il(f, out, ref); + break; + case PIPE_STENCIL_OP_INCR: + spe_il(f, clamp, 0x0ff); + spe_ai(f, out, in, 1); + spe_cgti(f, tmp, out, clamp); + spe_selb(f, out, out, clamp, tmp); + break; + case PIPE_STENCIL_OP_DECR: + spe_il(f, clamp, 0); + spe_ai(f, out, in, -1); + spe_cgti(f, tmp, out, clamp); + spe_selb(f, out, clamp, out, tmp); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + spe_ai(f, out, in, 1); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + spe_ai(f, out, in, -1); + break; + case PIPE_STENCIL_OP_INVERT: + spe_nor(f, out, in, in); + break; + default: + assert(0); + } + + spe_release_register(f, tmp); + spe_release_register(f, clamp); + + spe_selb(f, out, in, out, mask); +} + + +/** + * \param dsa Depth / stencil test state + * \param face 0 for front face, 1 for back face + * \param f Function to append instructions to + * \param reg_mask Mask of allocated registers + * \param mask Register containing mask of fragments passing the + * alpha test + * \param depth_mask Register containing mask of fragments passing the + * depth test + * \param depth_compliment Is \c depth_mask the compliment of the actual mask? + * \param stencil Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + * and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, + unsigned face, + struct spe_function *f, + int mask, + int depth_mask, + boolean depth_complement, + int stencil, + int depth_pass) +{ + int stencil_fail = spe_allocate_available_register(f); + int depth_fail = spe_allocate_available_register(f); + int stencil_mask = spe_allocate_available_register(f); + int stencil_pass = spe_allocate_available_register(f); + int face_stencil = spe_allocate_available_register(f); + int stencil_src = stencil; + const unsigned ref = (dsa->stencil[face].ref_value + & dsa->stencil[face].value_mask); + boolean complement = FALSE; + int stored = spe_allocate_available_register(f); + int tmp = spe_allocate_available_register(f); + + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].value_mask != 0x0ff)) { + spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } + + + switch (dsa->stencil[face].func) { + case PIPE_FUNC_NEVER: + spe_il(f, stencil_mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceqi(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgti(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgti(f, stencil_mask, stored, ref); + spe_ceqi(f, tmp, stored, ref); + spe_or(f, stencil_mask, stencil_mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + /* See comment below. */ + break; + + default: + assert(0); + break; + } + + spe_release_register(f, stored); + spe_release_register(f, tmp); + + + /* ALWAYS is a very common stencil-test, so some effort is applied to + * optimize that case. The stencil-pass mask is the same as the input + * fragment mask. This makes the stencil-test (above) a no-op, and the + * input fragment mask can be "renamed" the stencil-pass mask. + */ + if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { + spe_release_register(f, stencil_pass); + stencil_pass = mask; + } else { + if (complement) { + spe_andc(f, stencil_pass, mask, stencil_mask); + } else { + spe_and(f, stencil_pass, mask, stencil_mask); + } + } + + if (depth_complement) { + spe_andc(f, depth_pass, stencil_pass, depth_mask); + } else { + spe_and(f, depth_pass, stencil_pass, depth_mask); + } + + + /* Conditionally emit code to update the stencil value under various + * condititons. Note that there is no need to generate code under the + * following circumstances: + * + * - Stencil write mask is zero. + * - For stencil-fail if the stencil test is ALWAYS + * - For depth-fail if the stencil test is NEVER + * - For depth-pass if the stencil test is NEVER + * - Any of the 3 conditions if the operation is KEEP + */ + if (dsa->stencil[face].write_mask != 0) { + if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { + if (complement) { + spe_and(f, stencil_fail, mask, stencil_mask); + } else { + spe_andc(f, stencil_fail, mask, stencil_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, + dsa->stencil[face].fail_op, + dsa->stencil[face].ref_value); + + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { + if (depth_complement) { + spe_and(f, depth_fail, stencil_pass, depth_mask); + } else { + spe_andc(f, depth_fail, stencil_pass, depth_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, depth_fail, + dsa->stencil[face].zfail_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { + emit_stencil_op(f, face_stencil, stencil_src, depth_pass, + dsa->stencil[face].zpass_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + } + + spe_release_register(f, stencil_fail); + spe_release_register(f, depth_fail); + spe_release_register(f, stencil_mask); + if (stencil_pass != mask) { + spe_release_register(f, stencil_pass); + } + + /* If all of the stencil operations were KEEP or the stencil write mask was + * zero, "stencil_src" will still be set to "stencil". In this case + * release the "face_stencil" register. Otherwise apply the stencil write + * mask to select bits from the calculated stencil value and the previous + * stencil value. + */ + if (stencil_src == stencil) { + spe_release_register(f, face_stencil); + } else if (dsa->stencil[face].write_mask != 0x0ff) { + int tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_selb(f, stencil_src, stencil, stencil_src, tmp); + + spe_release_register(f, tmp); + } + + return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ + struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; + struct spe_function *const f = &cdsa->code; + + /* This code generates a maximum of 6 (alpha test) + 3 (depth test) + * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round + * up to 64 to make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + /* Allocate registers for the function's input parameters. Cleverly (and + * clever code is usually dangerous, but I couldn't resist) the generated + * function returns a structure. Returned structures start with register + * 3, and the structure fields are ordered to match up exactly with the + * input parameters. + */ + int mask = spe_allocate_register(f, 3); + int depth = spe_allocate_register(f, 4); + int stencil = spe_allocate_register(f, 5); + int zvals = spe_allocate_register(f, 6); + int frag_a = spe_allocate_register(f, 7); + int facing = spe_allocate_register(f, 8); + + int depth_mask = spe_allocate_available_register(f); + + boolean depth_complement; + + + emit_alpha_test(dsa, f, mask, frag_a); + + depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + + if (dsa->stencil[0].enabled) { + const int front_depth_pass = spe_allocate_available_register(f); + int front_stencil = emit_stencil_test(dsa, 0, f, mask, + depth_mask, depth_complement, + stencil, front_depth_pass); + + if (dsa->stencil[1].enabled) { + const int back_depth_pass = spe_allocate_available_register(f); + int back_stencil = emit_stencil_test(dsa, 1, f, mask, + depth_mask, depth_complement, + stencil, back_depth_pass); + + /* If the front facing stencil value and the back facing stencil + * value are stored in the same register, there is no need to select + * a value based on the facing. This can happen if the stencil value + * was not modified due to the write masks being zero, the stencil + * operations being KEEP, etc. + */ + if (front_stencil != back_stencil) { + spe_selb(f, stencil, back_stencil, front_stencil, facing); + } + + if (back_stencil != stencil) { + spe_release_register(f, back_stencil); + } + + if (front_stencil != stencil) { + spe_release_register(f, front_stencil); + } + + spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + + spe_release_register(f, back_depth_pass); + } else { + if (front_stencil != stencil) { + spe_or(f, stencil, front_stencil, front_stencil); + spe_release_register(f, front_stencil); + } + } + + spe_release_register(f, front_depth_pass); + } else if (dsa->depth.enabled) { + if (depth_complement) { + spe_andc(f, mask, mask, depth_mask); + } else { + spe_and(f, mask, mask, depth_mask); + } + } + + if (dsa->depth.writemask) { + spe_selb(f, depth, depth, zvals, mask); + } + + spe_bi(f, 0, 0, 0); +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, + unsigned factor, float const_alpha, + int src_alpha, int dst_alpha) +{ + union { + float f; + unsigned u; + } alpha; + int factor_reg; + int tmp; + + + alpha.f = const_alpha; + + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_or(f, factor_reg, src_alpha, src_alpha); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor_reg = dst_alpha; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + const_alpha = 1.0 - const_alpha; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_il(f, factor_reg, alpha.u & 0x0ffff); + spe_ilh(f, factor_reg, alpha.u >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, src_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, dst_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + factor_reg = -1; + break; + } + + return factor_reg; +} + + +/** + * \note Emits a maximum of 5 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, + unsigned sF, unsigned mask, + const struct pipe_blend_color *blend_color, + const int *src, + const int *dst, + int *factor) +{ + union { + float f[4]; + unsigned u[4]; + } color; + int tmp; + unsigned i; + + + color.f[0] = blend_color->color[0]; + color.f[1] = blend_color->color[1]; + color.f[2] = blend_color->color[2]; + color.f[3] = blend_color->color[3]; + + factor[0] = -1; + factor[1] = -1; + factor[2] = -1; + factor[3] = -1; + + switch (sF) { + case PIPE_BLENDFACTOR_ONE: + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_or(f, factor[i], src[i], src[i]); + } + } + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_or(f, factor[0], src[3], src[3]); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor[0] = dst[3]; + factor[1] = dst[3]; + factor[2] = dst[3]; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + factor[0] = dst[0]; + factor[1] = dst[1]; + factor[2] = dst[2]; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + /* Alpha saturate means min(As, 1-Ad). + */ + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, tmp, tmp, dst[3]); + spe_fcgt(f, factor[0], tmp, src[3]); + spe_selb(f, factor[0], src[3], tmp, factor[0]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + color.f[0] = 1.0 - color.f[0]; + color.f[1] = 1.0 - color.f[1]; + color.f[2] = 1.0 - color.f[2]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = spe_allocate_available_register(f); + + spe_il(f, factor[i], color.u[i] & 0x0ffff); + spe_ilh(f, factor[i], color.u[i] >> 16); + } + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + color.f[3] = 1.0 - color.f[3]; + /* FALLTHROUGH */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, factor[0], color.u[3] & 0x0ffff); + spe_ilh(f, factor[0], color.u[3] >> 16); + break; + + case PIPE_BLENDFACTOR_ZERO: + break; + + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, src[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, src[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, dst[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, dst[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + } +} + + +static void +emit_blend_calculation(struct spe_function *f, + unsigned func, unsigned sF, unsigned dF, + int src, int src_factor, int dst, int dst_factor) +{ + int tmp = spe_allocate_available_register(f); + + switch (func) { + case PIPE_BLEND_ADD: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fa(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fma(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, dst); + } + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fms(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_REVERSE_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, src); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, dst, src); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } + } else { + spe_fm(f, tmp, src, src_factor); + spe_fms(f, src, src, dst_factor, tmp); + } + break; + + case PIPE_BLEND_MIN: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, dst, src, tmp); + break; + + case PIPE_BLEND_MAX: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, src, dst, tmp); + break; + + default: + assert(0); + } + + spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color) +{ + struct pipe_blend_state *const b = &cb->base; + struct spe_function *const f = &cb->code; + + /* This code generates a maximum of 3 (source alpha factor) + * + 3 (destination alpha factor) + (3 * 5) (source color factor) + * + (3 * 5) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to + * make it a happy power-of-two. + */ + spe_init_func(f, 4 * 64); + + + const int frag[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + const int pixel[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + const int mask = spe_allocate_register(f, 11); + unsigned func[4]; + unsigned sF[4]; + unsigned dF[4]; + unsigned i; + int src_factor[4]; + int dst_factor[4]; + + + /* Does the selected blend mode make use of the source / destination + * color (RGB) blend factors? + */ + boolean need_color_factor = b->blend_enable + && (b->rgb_func != PIPE_BLEND_MIN) + && (b->rgb_func != PIPE_BLEND_MAX); + + /* Does the selected blend mode make use of the source / destination + * alpha blend factors? + */ + boolean need_alpha_factor = b->blend_enable + && (b->alpha_func != PIPE_BLEND_MIN) + && (b->alpha_func != PIPE_BLEND_MAX); + + + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor; + + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + dF[3] = b->rgb_dst_factor; + + + /* If alpha writing is enabled and the alpha blend mode requires use of + * the alpha factor, calculate the alpha factor. + */ + if (((b->colormask & 8) != 0) && need_alpha_factor) { + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], + blend_color->color[3], + frag[3], pixel[3]); + + /* If the alpha destination blend factor is the same as the alpha source + * blend factor, re-use the previously calculated value. + */ + dst_factor[3] = (dF[3] == sF[3]) + ? src_factor[3] + : emit_alpha_factor_calculation(f, dF[3], + blend_color->color[3], + frag[3], pixel[3]); + } + + + if (sF[0] == sF[3]) { + src_factor[0] = src_factor[3]; + src_factor[1] = src_factor[3]; + src_factor[2] = src_factor[3]; + } else if (sF[0] == dF[3]) { + src_factor[0] = dst_factor[3]; + src_factor[1] = dst_factor[3]; + src_factor[2] = dst_factor[3]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_src_factor, + b->colormask, + blend_color, + frag, pixel, src_factor); + } + + + if (dF[0] == sF[3]) { + dst_factor[0] = src_factor[3]; + dst_factor[1] = src_factor[3]; + dst_factor[2] = src_factor[3]; + } else if (dF[0] == dF[3]) { + dst_factor[0] = dst_factor[3]; + dst_factor[1] = dst_factor[3]; + dst_factor[2] = dst_factor[3]; + } else if (dF[0] == sF[0]) { + dst_factor[0] = src_factor[0]; + dst_factor[1] = src_factor[1]; + dst_factor[2] = src_factor[2]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_dst_factor, + b->colormask, + blend_color, + frag, pixel, dst_factor); + } + + + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + + for (i = 0; i < 4; ++i) { + if ((b->colormask & (1U << i)) != 0) { + emit_blend_calculation(f, + func[i], sF[i], dF[i], + frag[i], src_factor[i], + pixel[i], dst_factor[i]); + spe_selb(f, frag[i], pixel[i], frag[i], mask); + } else { + spe_or(f, frag[i], pixel[i], pixel[i]); + } + } + + spe_bi(f, 0, 0, 0); +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 0000000000..541c3b3be0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,35 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb, + const struct pipe_blend_color *blend_color); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index c071de1900..115ca8cd90 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -19,6 +19,7 @@ SOURCES = \ spu_main.c \ spu_blend.c \ spu_dcache.c \ + spu_per_fragment_op.c \ spu_render.c \ spu_texture.c \ spu_tile.c \ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 59300028d4..8e46f6e471 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,6 +58,9 @@ struct spu_vs_context draw; static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] ALIGN16_ATTRIB; +static unsigned char depth_stencil_code_buffer[4 * 64] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -248,14 +251,26 @@ cmd_state_blend(const struct pipe_blend_state *state) static void -cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) +cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) { if (Debug) printf("SPU %u: DEPTH_STENCIL: ztest %d\n", spu.init.id, - state->depth.enabled); + state->read_depth); + + ASSERT_ALIGN16(state->base); + + mfc_get(depth_stencil_code_buffer, + (unsigned int) state->base, /* src */ + ROUNDUP16(state->size), + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); - memcpy(&spu.depth_stencil, state, sizeof(*state)); + spu.frag_test = (frag_test_func) depth_stencil_code_buffer; + spu.read_depth = state->read_depth; + spu.read_stencil = state->read_stencil; } @@ -415,9 +430,9 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); break; case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) + cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index a13edd1702..444e218645 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -56,6 +56,17 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ +struct spu_frag_test_results { + qword mask; + qword depth; + qword stencil; +}; + +typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, + qword pixel_depth, qword pixel_stencil, qword frag_depth, + qword frag_alpha, qword facing); + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -79,8 +90,9 @@ struct spu_global struct cell_init_info init; struct spu_framebuffer fb; - struct pipe_blend_state blend_stencil; - struct pipe_depth_stencil_alpha_state depth_stencil; + boolean read_depth; + boolean read_stencil; + frag_test_func frag_test; struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct cell_command_texture texture; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 0000000000..d42b522b41 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,191 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file spu_per_fragment_op.c + * SPU implementation various per-fragment operations. + * + * \author Ian Romanick + */ + +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" + +#define ZERO 0x80 + +static void +read_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, qword *depth, + qword *stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + const qword shuf_vec = (qword) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 + }; + + + /* At even X values we want the first 4 shorts, and at odd X values we + * want the second 4 shorts. + */ + qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); + qword bias_mask = si_fsmbi(0x3333); + qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); + + *depth = si_shufb(*ptr, *ptr, sv); + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + + *depth = *ptr; + *stencil = si_il(0); + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0x7777); + + *depth = si_and(*ptr, mask); + *stencil = si_rotmai(si_andc(*ptr, mask), -24); + break; + } + + + default: + assert(0); + break; + } +} + + +static void +write_ds_quad(tile_t *buffer, unsigned x, unsigned y, + enum pipe_format depth_format, + qword depth, qword stencil) +{ + const int ix = x / 2; + const int iy = y / 2; + + (void) stencil; + + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: { + qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; + + qword sv = ((ix & 0x01) == 0) + ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31 } + : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15 }; + *ptr = si_shufb(depth, *ptr, sv); + break; + } + + + case PIPE_FORMAT_Z32_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + *ptr = depth; + break; + } + + + case PIPE_FORMAT_Z24S8_UNORM: { + qword *ptr = (qword *) &buffer->ui4[iy][ix]; + qword mask = si_fsmbi(0x7777); + + stencil = si_rotmai(stencil, 24); + *ptr = si_selb(stencil, depth, mask); + break; + } + + + default: + assert(0); + break; + } +} + + +qword +spu_do_depth_stencil(int x, int y, + qword frag_mask, qword frag_depth, qword frag_alpha, + qword facing) +{ + struct spu_frag_test_results result; + qword pixel_depth; + qword pixel_stencil; + + /* All of this preable code (everthing before the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + &pixel_depth, &pixel_stencil); + } + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z16_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z32_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + case PIPE_FORMAT_Z24S8_UNORM: + frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); + frag_depth = si_cfltu(frag_depth, 0); + break; + default: + assert(0); + break; + } + + result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, + frag_depth, frag_alpha, facing); + + + /* This code (everthing after the call to frag_test) should + * be generated on the PPU and upload to the SPU. + */ + if (spu.read_depth || spu.read_stencil) { + write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, + result.depth, result.stencil); + } + + return result.mask; +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 0000000000..6571258699 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,32 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + +extern qword +spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, + qword frag_alpha, qword facing); + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 20e77aa2e6..6df59abd36 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -98,7 +98,7 @@ my_tile(uint tx, uint ty) static INLINE void get_cz_tiles(uint tx, uint ty) { - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); @@ -153,7 +153,7 @@ static INLINE void wait_put_cz_tiles(void) { wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { wait_on_mask(1 << TAG_WRITE_TILE_Z); } } diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index be9624cf7d..81823f2463 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,8 +38,7 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" - -#include "spu_ztest.h" +#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -264,16 +263,12 @@ do_depth_test(int x, int y, mask_t quadmask) zvals.v = eval_z((float) x, (float) y); - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - int ix = (x - setup.cliprect_minx) / 4; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); - } - else { - int ix = (x - setup.cliprect_minx) / 2; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); - } + mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, + y - setup.cliprect_miny, + (qword) quadmask, + (qword) zvals.v, + (qword) spu_splats((unsigned char) 0x0ffu), + (qword) spu_splats((unsigned int) 0x01u)); if (spu_extract(spu_orx(mask), 0)) spu.cur_ztile_status = TILE_STATUS_DIRTY; @@ -299,7 +294,7 @@ emit_quad( int x, int y, mask_t mask ) sp->quad.first->run(sp->quad.first, &setup.quad); #else - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { mask = do_depth_test(x, y, mask); } @@ -434,7 +429,7 @@ static void flush_spans( void ) } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - if (spu.depth_stencil.depth.enabled) { + if (spu.read_depth) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); diff --git a/src/gallium/drivers/cell/spu/spu_ztest.h b/src/gallium/drivers/cell/spu/spu_ztest.h deleted file mode 100644 index ce8ad00339..0000000000 --- a/src/gallium/drivers/cell/spu/spu_ztest.h +++ /dev/null @@ -1,135 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Zbuffer/depth test code. - */ - - -#ifndef SPU_ZTEST_H -#define SPU_ZTEST_H - - -#ifdef __SPU__ -#include -#endif - - - -/** - * Perform Z testing for a 16-bit/value Z buffer. - * - * \param zvals vector of four fragment zvalues as floats - * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this - * contains the Z values for 2 quads, 8 pixels. - * \param x x coordinate of quad (only lsbit is significant) - * \param inMask indicates which fragments in the quad are alive - * \return new mask indicating which fragments are alive after ztest - */ -static INLINE vector unsigned int -spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, - uint x, vector unsigned int inMask) -{ -#define ZERO 0x80 - vector unsigned int zvals_ui4, zbuf_ui4, mask; - - /* convert floats to uints in [0, 65535] */ - zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ - zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ - - /* XXX this conditional could be removed with a bit of work */ - if (x & 1) { - /* convert zbuffer values from ushorts to uints */ - /* gather lower four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, - ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 16, 17, 18, 19, 20, 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15})); - } - else { - /* convert zbuffer values from ushorts to uints */ - /* gather upper four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31})); - } - return mask; -#undef ZERO -} - - -/** - * As above, but Zbuffer values as 32-bit uints - */ -static INLINE vector unsigned int -spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, - vector unsigned int inMask) -{ - vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; - - /* convert floats to uints in [0, 0xffffffff] */ - zvals_ui4 = spu_convtu(zvals, 32); - /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); - - return mask; -} - - -#endif /* SPU_ZTEST_H */ -- cgit v1.2.3 From 6ba9fb9b6693904054ad4e1506ba42e324334b0a Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Aug 2008 11:31:59 -0600 Subject: cell: asst fixes to get driver building/running again. Note that SPU vertex transformation is disabled at this time. --- src/gallium/drivers/cell/ppu/cell_clear.c | 4 +- src/gallium/drivers/cell/ppu/cell_context.c | 4 ++ src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 35 ++++++++++-- src/gallium/drivers/cell/ppu/cell_draw_arrays.h | 24 ++++++-- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 6 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 14 ++--- src/gallium/drivers/cell/ppu/cell_state_shader.c | 4 +- src/gallium/drivers/cell/ppu/cell_surface.c | 11 ++-- src/gallium/drivers/cell/ppu/cell_texture.c | 69 ++++++++++++++++++++--- src/gallium/drivers/cell/ppu/cell_texture.h | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 + src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 4 ++ src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 5 ++ src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_tri.c | 1 + src/gallium/drivers/cell/spu/spu_util.c | 2 + src/gallium/drivers/cell/spu/spu_vertex_shader.c | 26 ++++++++- 18 files changed, 176 insertions(+), 39 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index cee0917b63..a421c95c8e 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -48,6 +48,7 @@ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { + struct pipe_screen *screen = pipe->screen; struct cell_context *cell = cell_context(pipe); uint surfIndex; @@ -56,7 +57,8 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, if (!cell->cbuf_map[0]) - cell->cbuf_map[0] = pipe_surface_map(ps); + cell->cbuf_map[0] = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_GPU_WRITE); if (ps == cell->framebuffer.zsbuf) { surfIndex = 1; diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 5af95a3c10..9ff4e86943 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -73,11 +73,13 @@ cell_draw_create(struct cell_context *cell) { struct draw_context *draw = draw_create(); +#if 0 /* broken */ if (getenv("GALLIUM_CELL_VS")) { /* plug in SPU-based vertex transformation code */ draw->shader_queue_flush = cell_vertex_shader_queue_flush; draw->driver_private = cell; } +#endif return draw; } @@ -108,6 +110,8 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 6e08cf6fe8..f02dffe124 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,8 @@ cell_map_constant_buffers(struct cell_context *sp) } draw_set_mapped_constant_buffer(sp->draw, - sp->mapped_constants[PIPE_SHADER_VERTEX]); + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); } static void @@ -92,10 +93,12 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode, * XXX should the element buffer be specified/bound with a separate function? */ boolean -cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { struct cell_context *sp = cell_context(pipe); struct draw_context *draw = sp->draw; @@ -152,3 +155,25 @@ cell_draw_elements(struct pipe_context *pipe, return TRUE; } + + +boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + + +void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct cell_context *cell = cell_context(pipe); + draw_set_edgeflags(cell->draw, edgeflags); +} diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h index d5df4aa05f..cd35ec17b4 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -29,14 +29,26 @@ #define CELL_DRAW_ARRAYS_H -boolean cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); +extern boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); -boolean cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); +extern boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +extern boolean +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); + +extern void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); #endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 971d65d09e..fe5437023b 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -294,6 +294,8 @@ cell_set_framebuffer_state(struct pipe_context *pipe, struct pipe_surface *csurf = fb->cbufs[0]; struct pipe_surface *zsurf = fb->zsbuf; uint i; + uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); /* unmap old surfaces */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { @@ -313,10 +315,10 @@ cell_set_framebuffer_state(struct pipe_context *pipe, /* map new surfaces */ if (csurf) - cell->cbuf_map[0] = pipe_surface_map(csurf); + cell->cbuf_map[0] = pipe_surface_map(csurf, flags); if (zsurf) - cell->zsbuf_map = pipe_surface_map(zsurf); + cell->zsbuf_map = pipe_surface_map(zsurf, flags); cell->dirty |= CELL_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 3646a0ee4f..9d88c1cf3d 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -162,13 +162,13 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw->num_vs_outputs; - info.declarations = (uintptr_t) draw->machine.Declarations; - info.num_declarations = draw->machine.NumDeclarations; - info.instructions = (uintptr_t) draw->machine.Instructions; - info.num_instructions = draw->machine.NumInstructions; - info.immediates = (uintptr_t) draw->machine.Imms; - info.num_immediates = draw->machine.ImmLimit / 4; + info.num_outputs = draw_num_vs_outputs(draw); + info.declarations = (uintptr_t) draw->vs.machine.Declarations; + info.num_declarations = draw->vs.machine.NumDeclarations; + info.instructions = (uintptr_t) draw->vs.machine.Instructions; + info.num_instructions = draw->vs.machine.NumInstructions; + info.immediates = (uintptr_t) draw->vs.machine.Imms; + info.num_immediates = draw->vs.machine.ImmLimit / 4; emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, & info, sizeof(info)); diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index cd96b317fa..86bcad05e9 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -41,7 +41,7 @@ static INLINE struct cell_fragment_shader_state * cell_fragment_shader_state(void *shader) { - return (struct pipe_shader_state *) shader; + return (struct cell_fragment_shader_state *) shader; } @@ -49,7 +49,7 @@ cell_fragment_shader_state(void *shader) static INLINE struct cell_vertex_shader_state * cell_vertex_shader_state(void *shader) { - return (struct pipe_shader_state *) shader; + return (struct cell_vertex_shader_state *) shader; } diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 2d31ad89a6..d9e3b510dc 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -26,11 +26,12 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_tile.h" + #include "cell_context.h" #include "cell_surface.h" @@ -46,12 +47,12 @@ cell_surface_copy(struct pipe_context *pipe, { assert( dst->cpp == src->cpp ); - pipe_copy_rect(pipe_surface_map(dst), + pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), &dst->block, dst->stride, dstx, dsty, width, height, - pipe_surface_map(src), + pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ), do_flip ? -src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); @@ -81,7 +82,7 @@ cell_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE); assert(dst->stride > 0); diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 1add81373d..5a0942bbd6 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,8 +33,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/u_memory.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "cell_context.h" #include "cell_state.h" @@ -68,11 +69,13 @@ cell_texture_layout(struct cell_texture * spt) pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level] * pt->block.size; + spt->level_offset[level] = spt->buffer_size; spt->buffer_size += (pt->nblocksy[level] * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - pt->nblocksx[level] * pt->block.size; + pt->nblocksx[level] * pt->block.size); width = minify(width); height = minify(height); @@ -147,7 +150,8 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, static struct pipe_surface * cell_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); @@ -166,6 +170,10 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, ps->nblocksy = pt->nblocksy[level]; ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; + ps->usage = usage; + + /* XXX may need to override usage flags (see sp_texture.c) */ + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * @@ -228,10 +236,11 @@ cell_tile_texture(struct cell_context *cell, assert(w % TILE_SIZE == 0); assert(h % TILE_SIZE == 0); - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice); + surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); ASSERT(surf); - src = (const uint *) pipe_surface_map(surf); + src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); if (texture->tiled_data) { align_free(texture->tiled_data); @@ -246,11 +255,12 @@ cell_tile_texture(struct cell_context *cell, } - void cell_update_texture_mapping(struct cell_context *cell) { +#if 0 uint face = 0, level = 0, zslice = 0; +#endif uint i; for (i = 0; i < CELL_MAX_SAMPLERS; i++) { @@ -275,10 +285,52 @@ cell_update_texture_mapping(struct cell_context *cell) } +static void * +cell_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ +#if 0 + cell_screen(screen)->timestamp++; +#endif + } + + return map + surface->offset; +} + + +static void +cell_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); +} + + void cell_init_texture_functions(struct cell_context *cell) { - cell->pipe.texture_update = cell_texture_update; + /*cell->pipe.texture_update = cell_texture_update;*/ } void @@ -287,4 +339,7 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->texture_create = cell_texture_create_screen; screen->texture_release = cell_texture_release_screen; screen->get_tex_surface = cell_get_tex_surface_screen; + + screen->surface_map = cell_surface_map; + screen->surface_unmap = cell_surface_unmap; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index fcee069d05..6d37e95ebc 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -41,6 +41,7 @@ struct cell_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 3a181b585c..e4230c7a5f 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -37,6 +37,7 @@ #include "cell_spu.h" #include "cell_vbuf.h" #include "draw/draw_vbuf.h" +#include "util/u_memory.h" /** Allow vertex data to be inlined after RENDER command */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 49d5443cde..2ece0250f6 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -260,6 +260,7 @@ emit_fetch(struct spe_function *p, void cell_update_vertex_fetch(struct draw_context *draw) { +#if 0 struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct spe_function *p = &cell->attrib_fetch; @@ -337,4 +338,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) cell->attrib_fetch_offsets[function_index[i]]; } } +#else + assert(0); +#endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f753960a0f..3658947715 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -50,6 +51,7 @@ void cell_vertex_shader_queue_flush(struct draw_context *draw) { +#if 0 struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct cell_command_vs *const vs = &cell_global.command[0].vs; @@ -138,4 +140,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; +#else + assert(0); +#endif } diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index c68f78f59b..8605679940 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -99,7 +99,7 @@ struct spu_exec_machine * 1 address */ struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_ADDRS + 1] + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] ALIGN16_ATTRIB; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 6df59abd36..305dc98881 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -35,7 +35,7 @@ #include "spu_tri.h" #include "spu_tile.h" #include "cell/common.h" - +#include "util/u_memory.h" /** diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8944ef171e..2a4e0b423c 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -32,6 +32,7 @@ #include #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_math.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index dbcf4b0eb9..b25ca4eafc 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,6 @@ + #include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index a1e81975e6..f81d19fea1 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -36,13 +36,35 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "spu_vertex_shader.h" -#include "spu_exec.h" +#include "util/u_math.h" #include "draw/draw_private.h" #include "draw/draw_context.h" #include "cell/common.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" #include "spu_main.h" + +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_FAR_BIT 0x10 +#define CLIP_NEAR_BIT 0x20 + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + static INLINE unsigned compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) { -- cgit v1.2.3 From afaa53040bd01ca86762e7d7b1a5a65810767921 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Fri, 3 Oct 2008 18:00:43 -0600 Subject: CELL: changes to generate SPU code for stenciling This set of code changes are for stencil code generation support. Both one-sided and two-sided stenciling are supported. In addition to the raw code generation changes, these changes had to be made elsewhere in the system: - Added new "register set" feature to the SPE assembly generation. A "register set" is a way to allocate multiple registers and free them all at the same time, delegating register allocation management to the spe_function unit. It's quite useful in complex register allocation schemes (like stenciling). - Added and improved SPE macro calculations. These are operations between registers and unsigned integer immediates. In many cases, the calculation can be performed with a single instruction; the macros will generate the single instruction if possible, or generate a register load and register-to-register operation if not. These macro functions are: spe_load_uint() (which has new ways to load a value in a single instruction), spe_and_uint(), spe_xor_uint(), spe_compare_equal_uint(), and spe_compare_greater_uint(). - Added facing to fragment generation. While rendering, the rasterizer needs to be able to determine front- and back-facing fragments, in order to correctly apply two-sided stencil. That requires these changes: - Added front_winding field to the cell_command_render block, so that the state tracker could communicate to the rasterizer what it considered to be the front-facing direction. - Added fragment facing as an input to the fragment function. - Calculated facing is passed during emit_quad(). --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 246 +++++- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 41 +- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 881 ++++++++++++++++++--- src/gallium/drivers/cell/ppu/cell_render.c | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 19 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 3 +- src/gallium/drivers/cell/spu/spu_render.c | 4 +- src/gallium/drivers/cell/spu/spu_tri.c | 35 +- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 12 files changed, 1091 insertions(+), 146 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 491141f190..8a87e9abb1 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -359,14 +359,21 @@ void _name (struct spe_function *p, int imm) \ */ void spe_init_func(struct spe_function *p, unsigned code_size) { + register unsigned int i; + p->store = align_malloc(code_size, 16); p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ - p->regs[0] = ~7; - p->regs[1] = (1U << (80 - 64)) - 1; + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } p->print = false; p->indent = 0; @@ -398,12 +405,8 @@ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < SPE_NUM_REGS; i++) { - const uint64_t mask = (1ULL << (i % 64)); - const unsigned idx = i / 64; - - assert(idx < 2); - if ((p->regs[idx] & mask) != 0) { - p->regs[idx] &= ~mask; + if (p->regs[i] == 0) { + p->regs[i] = 1; return i; } } @@ -417,31 +420,68 @@ int spe_allocate_available_register(struct spe_function *p) */ int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) != 0); - - p->regs[idx] &= ~(1ULL << bit); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; return reg; } /** - * Mark the given SPE register as "unallocated". + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. */ void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); - assert(idx < 2); + p->regs[reg] = 0; +} - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) == 0); +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + register unsigned int i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + unsigned int i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; - p->regs[idx] |= (1ULL << bit); + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]--; + } } @@ -603,8 +643,10 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) { /* If the whole value is in the lower 18 bits, use ila, which * doesn't sign-extend. Otherwise, if the two halfwords of - * the constant are identical, use ilh. Otherwise, we have - * to use ilhu followed by iohl. + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. */ if ((ui & 0xfffc0000) == ui) { spe_ila(p, rT, ui); @@ -612,13 +654,171 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) else if ((ui >> 16) == (ui & 0xffff)) { spe_ilh(p, rT, ui & 0xffff); } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + unsigned int mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ spe_ilhu(p, rT, ui >> 16); if (ui & 0xffff) spe_iohl(p, rT, ui & 0xffff); } } +/* This function is constructed identically to spe_sor_uint() below. + * Changes to one should be made in the other. + */ +void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +/* This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ +void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 61c7edeb60..cd2e245409 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -53,17 +53,26 @@ struct spe_function uint num_inst; uint max_inst; - /** - * Mask of used / unused registers - * - * Each set bit corresponds to an available register. Each cleared bit - * corresponds to an allocated register. + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. * * \sa * spe_allocate_register, spe_allocate_available_register, - * spe_release_register + * spe_allocate_register_set, spe_release_register_set, spe_release_register, */ - uint64_t regs[SPE_NUM_REGS / 64]; + unsigned char regs[SPE_NUM_REGS]; boolean print; /**< print/dump instructions as they're emitted? */ int indent; /**< number of spaces to indent */ @@ -77,6 +86,8 @@ extern unsigned spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); @@ -307,6 +318,22 @@ spe_load_int(struct spe_function *p, unsigned rT, int i); extern void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 99329fd8e2..c223bc1744 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -227,6 +227,7 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; + uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 653afc235d..f920ae13b4 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -54,10 +54,12 @@ * \param ifragZ_reg register containing integer fragment Z values (in) * \param ifbZ_reg register containing integer frame buffer Z values (in/out) * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns true if the Z-buffer needs to be updated. */ -static void -gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) { /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ @@ -132,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; */ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return true; } + + return false; } @@ -238,22 +243,34 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, * it and have to allocate and load it again unnecessarily. */ static inline void -setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int *r) { if (*is_already_set) return; *r = spe_allocate_available_register(f); - spe_load_float(f, *r, value); - *is_already_set = true; } static inline void -release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +release_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int r) { if (!*is_already_set) return; spe_release_register(f, r); *is_already_set = false; } +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + setup_optional_register(f, is_already_set, r); + spe_load_float(f, *r, value); +} + +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + release_optional_register(f, is_already_set, r); +} + /** * Generate SPE code to implement the given blend mode for a quad of pixels. * \param f SPE function to append instruction onto. @@ -1117,6 +1134,633 @@ gen_colormask(struct spe_function *f, spe_release_register(f, colormask_reg); } +/* This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in (mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, + unsigned int mask_reg, unsigned int fbS_reg, + unsigned int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the stencil_pass_reg + * register, taking into account whether each fragment was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + /* stencil_pass = mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + /* stencil_fail = mask & ~stencil_pass */ + break; + + case PIPE_FUNC_NOTEQUAL: + /* stencil_pass = mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GREATER: + /* stencil_pass = mask & (s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_LESS: { + /* stencil_pass = mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_LEQUAL: + /* stencil_pass = mask & (s <= reference) = mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GEQUAL: { + /* stencil_pass = mask & (s >= reference) = mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_NEVER: + /* stencil_pass = mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + spe_move(f, stencil_pass_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = mask & 1 = mask */ + spe_move(f, stencil_pass_reg, mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (mask_reg & ~stencil_pass_reg). + */ +} + +/* This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, unsigned int stencil_op, + unsigned int stencil_ref_value, unsigned int stencil_max_value, + unsigned int fbS_reg, unsigned int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/* This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, + unsigned int fbS_reg, + unsigned int *fail_reg, unsigned int *zfail_reg, + unsigned int *zpass_reg, unsigned int *back_fail_reg, + unsigned int *back_zfail_reg, unsigned int *back_zpass_reg) +{ + unsigned zfail_op, back_zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(dsa->stencil[0].enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes. In particular, that means that the "zfail_op" (and the backfacing + * counterpart, if active) are not considered - a failing stencil test will + * trigger the "fail_op", and a passing stencil test will trigger the + * "zpass_op". + * + * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP, + * we keep them from being calculated. + */ + if (dsa->depth.enabled) { + zfail_op = dsa->stencil[0].zfail_op; + back_zfail_op = dsa->stencil[1].zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + back_zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *fail_reg); + } + + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == dsa->stencil[0].fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) { + *zpass_reg = *fail_reg; + } + else if (dsa->stencil[0].zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zpass_reg); + } + + /* If two-sided stencil is enabled, we have more work to do. */ + if (!dsa->stencil[1].enabled) { + /* This just flags that the registers need not be deallocated later */ + *back_fail_reg = fbS_reg; + *back_zfail_reg = fbS_reg; + *back_zpass_reg = fbS_reg; + } + else { + /* Same calculations as above, but for the back stencil */ + if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) { + *back_fail_reg = fbS_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) { + *back_fail_reg = *fail_reg; + } + else if (dsa->stencil[1].fail_op == zfail_op) { + *back_fail_reg = *zfail_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) { + *back_fail_reg = *zpass_reg; + } + else { + *back_fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_fail_reg); + } + + if (back_zfail_op == PIPE_STENCIL_OP_KEEP) { + *back_zfail_reg = fbS_reg; + } + else if (back_zfail_op == dsa->stencil[0].fail_op) { + *back_zfail_reg = *fail_reg; + } + else if (back_zfail_op == zfail_op) { + *back_zfail_reg = *zfail_reg; + } + else if (back_zfail_op == dsa->stencil[0].zpass_op) { + *back_zfail_reg = *zpass_reg; + } + else if (back_zfail_op == dsa->stencil[1].fail_op) { + *back_zfail_reg = *back_fail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zfail_reg); + } + + if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { + *back_zpass_reg = fbS_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) { + *back_zpass_reg = *fail_reg; + } + else if (dsa->stencil[1].zpass_op == zfail_op) { + *back_zpass_reg = *zfail_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) { + *back_zpass_reg = *zpass_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) { + *back_zpass_reg = *back_fail_reg; + } + else if (dsa->stencil[1].zpass_op == back_zfail_op) { + *back_zpass_reg = *back_zfail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zpass_reg); + } + } /* End of calculations for back-facing stencil */ +} + +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const int const facing_reg, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = false; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + unsigned int stencil_pass_reg, stencil_fail_reg; + unsigned int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + unsigned int stencil_writemask_reg; + unsigned int zmask_reg; + unsigned int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_allocate_register_set(f); + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + */ + if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Trivial: don't need to calculate stencil values, and don't need to + * write them back to the framebuffer. + */ + need_to_calculate_stencil_values = false; + need_to_writemask_stencil_values = false; + } + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = false; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = true; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); + if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { + unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); + spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); + spe_release_register(f, back_write_mask_reg); + } + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); + + /* If two-sided stenciling is on, generate code to run the stencil + * test on the backfacing stencil as well, and combine the two results + * into the one correct result based on facing. + */ + if (dsa->stencil[1].enabled) { + unsigned int temp_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); + spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); + spe_release_register(f, temp_reg); + } + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values; + unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values; + + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + gen_get_stencil_values(f, dsa, fbS_reg, + &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, + &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, + &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values); + + /* Tricky, tricky, tricky - the things we do to create optimal + * code... + * + * The various stencil values registers may overlap with each other + * and with fbS_reg arbitrarily (as any particular operation is + * only calculated once and stored in one register, no matter + * how many times it is used). So we can't change the values + * within those registers directly - if we change a value in a + * register that's being referenced by two different calculations, + * we've just unwittingly changed the second value as well... + * + * Avoid this by allocating new registers to hold the results + * (there may be 2, if the depth test is off, or 3, if it is on). + * These will be released as part of the register set. + */ + if (!dsa->stencil[1].enabled) { + /* The easy case: if two-sided stenciling is *not* enabled, we + * just use the front-sided values. + */ + stencil_fail_values = front_stencil_fail_values; + stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values; + stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; + } + else { /* two-sided stencil enabled */ + /* Allocate new registers for the needed merged values */ + stencil_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); + if (dsa->depth.enabled) { + stencil_pass_depth_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg); + } + else { + stencil_pass_depth_fail_values = fbS_reg; + } + stencil_pass_depth_pass_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg); + } + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_move(f, newS_reg, fbS_reg); + modified_buffers = true; + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + } + + /* Same for the stencil pass/depth pass mask */ + if (stencil_pass_depth_pass_values != fbS_reg) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_release_register_set(f); + + /* Return true if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + /** * Generate SPE code to implement the fragment operations (alpha test, * depth test, stencil test, blending, colormask, and final @@ -1156,6 +1800,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const int fragB_reg = 10; /* vector float */ const int fragA_reg = 11; /* vector float */ const int mask_reg = 12; /* vector uint */ + const int facing_reg = 13; /* uint */ /* offset of quad from start of tile * XXX assuming 4-byte pixels for color AND Z/stencil!!!! @@ -1183,6 +1828,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_allocate_register(f, fragB_reg); spe_allocate_register(f, fragA_reg); spe_allocate_register(f, mask_reg); + spe_allocate_register(f, facing_reg); quad_offset_reg = spe_allocate_available_register(f); fbRGBA_reg = spe_allocate_available_register(f); @@ -1195,6 +1841,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); + spe_comment(f, 0, "Computing tile location in memory"); spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ @@ -1205,124 +1852,164 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, y2_reg); } - if (dsa->alpha.enabled) { gen_alpha_test(dsa, f, mask_reg, fragA_reg); } + /* If we need the stencil buffers (because one- or two-sided stencil is + * enabled) or the depth buffer (because the depth test is enabled), + * go grab them. Note that if either one- or two-sided stencil is + * enabled, dsa->stencil[0].enabled will be true. + */ if (dsa->depth.enabled || dsa->stencil[0].enabled) { const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; boolean write_depth_stencil; - int fbZ_reg = spe_allocate_available_register(f); /* Z values */ - int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + /* We may or may not need to allocate a register for Z or stencil values */ + boolean fbS_reg_set = false, fbZ_reg_set = false; + unsigned int fbS_reg, fbZ_reg = 0; + + spe_comment(f, 0, "Loading Z/stencil tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + /* XXX Not sure this is allowed if we've only got a 16-bit Z buffer... */ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - if (dsa->depth.enabled) { - /* Extract Z bits from fbZS_reg into fbZ_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - int mask_reg = spe_allocate_available_register(f); - spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ - spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ - spe_release_register(f, mask_reg); - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); /* fbZ = fbZS >> 8 */ - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 32-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 16-bit Z values now */ - } - else { - ASSERT(0); /* invalid format */ - } - - /* Convert fragZ values from float[4] to 16, 24 or 32-bit uint[4] */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM || - zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 8 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 16 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - } - } - else { - /* no Z test, but set Z to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbZ_reg, 0); /* XXX set to zero for now */ + /* From the Z/stencil buffer format, pull out the bits we need for + * Z and/or stencil. We'll also convert the incoming fragment Z + * value in fragZ_reg from a floating point value in [0.0..1.0] to + * an unsigned integer value with the appropriate resolution. + */ + switch(zs_format) { + + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + if (dsa->depth.enabled) { + /* We need the Z part at least */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* four 24-bit Z values in the low-order bits */ + spe_and_uint(f, fbZ_reg, fbZS_reg, 0x00ffffff); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* four 8-bit Z values in the high-order bits */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + } + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* shift by 8 to get the upper 24-bit values */ + spe_rotmi(f, fbS_reg, fbZS_reg, -8); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* 8-bit stencil in the low-order bits - mask them out */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + } + break; + + case PIPE_FORMAT_Z32_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 32-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + } + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + if (dsa->depth.enabled) { + /* XXX Not sure this is correct, but it was here before, so we're + * going with it for now + */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 16-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + } + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ } - + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ if (dsa->stencil[0].enabled) { - /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX extract with a shift */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* XXX extract with a mask */ - ASSERT(0); - } - } - else { - /* no stencil test, but set to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbS_reg, 0); /* XXX set to zero for now */ - } + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg_set); + ASSERT(fbZ_reg_set); + spe_comment(f, 0, "Perform stencil test"); - if (dsa->stencil[0].enabled) { - /* XXX this may involve depth testing too */ - // gen_stencil_test(dsa, f, ... ); - ASSERT(0); + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); - gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); spe_release_register(f, zmask_reg); } - - /* do we need to write Z and/or Stencil back into framebuffer? */ - write_depth_stencil = (dsa->depth.writemask | - dsa->stencil[0].write_mask | - dsa->stencil[1].write_mask); + else { + write_depth_stencil = false; + } if (write_depth_stencil) { /* Merge latest Z and Stencil values into fbZS_reg. * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. * fbS_reg has four 8-bit Z values in bits [7..0]. */ + spe_comment(f, 0, "Storing depth/stencil values"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else { + spe_move(f, fbZS_reg, fbZ_reg); + } } else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } } else if (zs_format == PIPE_FORMAT_Z32_UNORM) { spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ @@ -1341,11 +2028,10 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); } - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); + release_optional_register(f, &fbZ_reg_set, fbZ_reg); + release_optional_register(f, &fbS_reg_set, fbS_reg); } - /* Get framebuffer quad/colors. We'll need these for blending, * color masking, and to obey the quad/pixel mask. * Load: fbRGBA_reg = memory[color_tile + quad_offset] @@ -1354,8 +2040,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) */ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - if (blend->blend_enable) { + spe_comment(f, 0, "Perform blending"); gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } @@ -1369,19 +2055,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int rgba_reg = spe_allocate_available_register(f); /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert fragment colors to framebuffer colors"); gen_pack_colors(f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, rgba_reg); if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); } - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) * rgba[i] = rgba[i]; @@ -1393,6 +2081,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) /* Store updated quad in tile: * memory[color_tile + quad_offset] = rgba_reg; */ + spe_comment(f, 0, "Store framebuffer colors"); spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); spe_release_register(f, rgba_reg); diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index dd25ae880e..79cb8df82f 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell) struct cell_command_render *render = &cell_global.command[i].render; render->prim_type = PIPE_PRIM_TRIANGLES; render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; render->vertex_size = cell->vertex_info->size * 4; render->xmin = cell->prim_buffer.xmin; render->ymin = cell->prim_buffer.ymin; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..578ddf62dc 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,6 +214,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; + render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 29a305232e..1cd577c23c 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -73,7 +73,8 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); /** Function for running fragment program */ typedef void (*spu_fragment_program_func)(vector float *inputs, diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index f107764fb2..d252fa6dc1 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -57,7 +57,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragG, vector float fragB, vector float fragA, - vector unsigned int mask) + vector unsigned int mask, + uint facing) { vector float frag_aos[4]; unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ @@ -433,23 +434,23 @@ spu_fallback_fragment_ops(uint x, uint y, /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x0000ff00; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0x000000ff; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x00ff0000; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0xff000000; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0x000000ff; /* alpha */ break; default: diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index f817abf046..a61689c83a 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -38,7 +38,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 305dc98881..82dbeb26b7 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty); + drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); @@ -297,5 +297,3 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf("SPU %u: RENDER done\n", spu.init.id); } - - diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 0a8fb56a62..6039cd80b2 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -118,6 +118,8 @@ struct setup_stage { float oneoverarea; + uint facing; + uint tx, ty; int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; @@ -274,7 +276,7 @@ eval_z(float x, float y) * overall. */ static INLINE void -emit_quad( int x, int y, mask_t mask ) +emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { @@ -344,7 +346,8 @@ emit_quad( int x, int y, mask_t mask ) fragZ, soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - mask); + mask, + setup.facing); } } @@ -379,7 +382,8 @@ emit_quad( int x, int y, mask_t mask ) outputs[0*4+1], outputs[0*4+2], outputs[0*4+3], - mask); + mask, + setup.facing); } } } @@ -483,7 +487,7 @@ static void flush_spans( void ) */ for (x = block(minleft); x <= block(maxright); x += 2) { #if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); + emit_quad( x, setup.span.y, calculate_mask( x )); #endif } @@ -902,13 +906,28 @@ static void subtriangle( struct edge *eleft, eright->sy += lines; } +static float +determinant( const float *v0, + const float *v1, + const float *v2 ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + /** * Draw triangle into tile at (tx, ty) (tile coords) * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding) { setup.tx = tx; setup.ty = ty; @@ -919,6 +938,12 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) setup.cliprect_maxx = (tx + 1) * TILE_SIZE; setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + /* Before we sort vertices, determine the facing of the triangle, + * which will be needed for front/back-face stencil application + */ + float det = determinant(v0, v1, v2); + setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, (struct vertex_header *) v2)) { diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index aa694dd7c9..abc3d35160 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From ddeec1ed10d6c12403fe8d30c072ea68f044db99 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 13:55:18 -0600 Subject: cell: simplify spu debug code --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_context.c | 1 + src/gallium/drivers/cell/spu/spu_command.c | 47 +++++++++++++---------------- src/gallium/drivers/cell/spu/spu_debug.h | 9 ------ src/gallium/drivers/cell/spu/spu_main.c | 9 +----- src/gallium/drivers/cell/spu/spu_main.h | 15 +++++++-- src/gallium/drivers/cell/spu/spu_render.c | 7 +++-- 7 files changed, 41 insertions(+), 48 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 3b5a25e165..8ae78265f2 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -111,6 +111,7 @@ #define CELL_DEBUG_SYNC (1 << 2) #define CELL_DEBUG_FRAGMENT_OPS (1 << 3) #define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b66aa9c9d9..f8d5eef3ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -93,6 +93,7 @@ static const struct debug_named_value cell_debug_flags[] = { {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index b521c3aecf..ebbed3d1dc 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -44,7 +44,6 @@ #include "spu_tile.h" #include "spu_vertex_shader.h" #include "spu_dcache.h" -#include "spu_debug.h" #include "cell/common.h" @@ -97,7 +96,7 @@ release_buffer(uint buffer) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -165,14 +164,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - DEBUG_PRINTF("CLEAR SURF done\n"); + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -189,7 +188,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { static int warned = 0; - DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ @@ -229,7 +228,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -247,11 +246,11 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) const float *constants = (const float *) &buffer[pos + 2]; uint i; - DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); /* Expand each float to float[4] for SOA execution */ for (i = 0; i < num_const; i++) { - DEBUG_PRINTF(" const[%u] = %f\n", i, constants[i]); + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); spu.constants[i] = spu_splats(constants[i]); } @@ -263,7 +262,7 @@ cmd_state_fs_constants(const uint64_t *buffer, uint pos) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -352,7 +351,7 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) { uint unit = sampler->unit; - DEBUG_PRINTF("SAMPLER [%u]\n", unit); + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); spu.sampler[unit] = sampler->state; @@ -404,9 +403,7 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint unit = texture->unit; uint i; - //if (spu.init.id==0) Debug=1; - - DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit); + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); spu.texture[unit].max_level = 0; spu.texture[unit].target = texture->target; @@ -416,7 +413,7 @@ cmd_state_texture(const struct cell_command_texture *texture) uint height = texture->height[i]; uint depth = texture->depth[i]; - DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i, + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, texture->start[i], texture->width[i], texture->height[i]); spu.texture[unit].level[i].start = texture->start[i]; @@ -438,15 +435,13 @@ cmd_state_texture(const struct cell_command_texture *texture) } update_tex_masks(&spu.texture[unit], &spu.sampler[unit], unit); - - //Debug=0; } static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -485,7 +480,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - DEBUG_PRINTF("FINISH\n"); + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -510,7 +505,7 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -530,7 +525,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - DEBUG_PRINTF("release batch buf %u\n", buf); + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); release_buffer(buf); /* @@ -663,7 +658,7 @@ cmd_batch(uint opcode) } } - DEBUG_PRINTF("BATCH complete\n"); + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); } @@ -677,7 +672,7 @@ command_loop(void) struct cell_command cmd; int exitFlag = 0; - DEBUG_PRINTF("Enter command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -686,12 +681,12 @@ command_loop(void) unsigned opcode; int tag = 0; - DEBUG_PRINTF("Wait for cmd...\n"); + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - DEBUG_PRINTF("got cmd 0x%x\n", opcode); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -708,7 +703,7 @@ command_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - DEBUG_PRINTF("EXIT\n"); + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -725,7 +720,7 @@ command_loop(void) } - DEBUG_PRINTF("Exit command loop\n"); + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); spu_dcache_report(); } diff --git a/src/gallium/drivers/cell/spu/spu_debug.h b/src/gallium/drivers/cell/spu/spu_debug.h index eeec052655..25653dcdcd 100644 --- a/src/gallium/drivers/cell/spu/spu_debug.h +++ b/src/gallium/drivers/cell/spu/spu_debug.h @@ -30,28 +30,19 @@ #define SPU_DEBUG_H -/* Set to 0 to disable all extraneous debugging code */ -#define DEBUG 1 - #if DEBUG -extern boolean Debug; -extern boolean force_fragment_ops_fallback; /* These debug macros use the unusual construction ", ##__VA_ARGS__" * which expands to the expected comma + args if variadic arguments * are supplied, but swallows the comma if there are no variadic * arguments (which avoids syntax errors that would otherwise occur). */ -#define DEBUG_PRINTF(format,...) \ - if (Debug) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #define D_PRINTF(flag, format,...) \ if (spu.init.debug_flags & (flag)) \ printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) #else -#define DEBUG_PRINTF(...) #define D_PRINTF(...) #endif diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 4becd0f92a..c8bb251905 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -40,7 +40,6 @@ #include "spu_per_fragment_op.h" #include "spu_texture.h" //#include "spu_test.h" -#include "spu_debug.h" #include "cell/common.h" @@ -53,12 +52,6 @@ helpful headers: struct spu_global spu; -#if DEBUG -boolean Debug = FALSE; -boolean force_fragment_ops_fallback = TRUE; -#endif - - static void one_time_init(void) { @@ -102,7 +95,7 @@ main(main_param_t speid, main_param_t argp) one_time_init(); - DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); /* get initialization data */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index ca72baea8b..569b9e45d4 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -36,6 +36,19 @@ #include "pipe/p_state.h" +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + #define MAX_WIDTH 1024 #define MAX_HEIGHT 1024 @@ -187,8 +200,6 @@ struct spu_global extern struct spu_global spu; -extern boolean Debug; - diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 82dbeb26b7..cfff19b6c0 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -177,7 +177,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) uint i, j; - if (Debug) { +#if 0 printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " "inline_vert=%u\n", spu.init.id, @@ -190,7 +190,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf(" bound: %g, %g .. %g, %g\n", render->xmin, render->ymin, render->xmax, render->ymax); */ - } +#endif ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -293,7 +293,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - if (Debug) +#if 0 printf("SPU %u: RENDER done\n", spu.init.id); +#endif } -- cgit v1.2.3 From ec7d6c656178babdf143faa242f7a3df9d0bc22c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 14:39:16 -0600 Subject: cell: send rasterizer state to SPUs in proper way, remove front_winding hack --- src/gallium/drivers/cell/common.h | 18 ++++++++++++++---- src/gallium/drivers/cell/ppu/cell_state_emit.c | 7 +++++++ src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 - src/gallium/drivers/cell/spu/spu_command.c | 8 ++++++++ src/gallium/drivers/cell/spu/spu_main.h | 1 + src/gallium/drivers/cell/spu/spu_render.c | 2 +- src/gallium/drivers/cell/spu/spu_tri.c | 4 ++-- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 8 files changed, 34 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 1f6f2d494b..0ff2c491fb 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -99,8 +99,9 @@ #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 #define CELL_CMD_STATE_FS_CONSTANTS 21 -#define CELL_CMD_VS_EXECUTE 22 -#define CELL_CMD_FLUSH_BUFFER_RANGE 23 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 /** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 @@ -156,13 +157,23 @@ struct cell_command_fragment_program */ struct cell_command_framebuffer { - uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + uint64_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; }; +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + uint64_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; +}; + + /** * Clear framebuffer to the given value/color. */ @@ -238,7 +249,6 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; - uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index d2427584ba..e6387382f2 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -147,6 +147,13 @@ cell_emit_state(struct cell_context *cell) #endif } + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + struct cell_command_rasterizer *rast = + cell_batch_alloc(cell, sizeof(*rast)); + rast->opcode = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + if (cell->dirty & (CELL_NEW_FS)) { /* Send new fragment program to SPUs */ struct cell_command_fragment_program *fp diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index 578ddf62dc..aa63435b93 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,7 +214,6 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; - render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 4febd5385b..d2c282a022 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -583,6 +583,14 @@ cmd_batch(uint opcode) case CELL_CMD_STATE_FS_CONSTANTS: pos = cmd_state_fs_constants(buffer, pos); break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 8; + } + break; case CELL_CMD_STATE_SAMPLER: { struct cell_command_sampler *sampler diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index f87495b72d..4099e52699 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -153,6 +153,7 @@ struct spu_global struct pipe_blend_state blend; struct pipe_blend_color blend_color; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index cfff19b6c0..75a7f75abc 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); + drawn += tri_draw(v0, v1, v2, tx, ty); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2417db8960..1519b8cd7e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -775,7 +775,7 @@ determinant(const float *v0, const float *v1, const float *v2) */ boolean tri_draw(const float *v0, const float *v1, const float *v2, - uint tx, uint ty, uint front_winding) + uint tx, uint ty) { setup.tx = tx; setup.ty = ty; @@ -790,7 +790,7 @@ tri_draw(const float *v0, const float *v1, const float *v2, * which will be needed for front/back-face stencil application */ float det = determinant(v0, v1, v2); - setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + setup.facing = (det > 0.0) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index abc3d35160..aa694dd7c9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From 1c915b14a545ffb10cc1c98cc69f997b6471617f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 19:40:51 -0600 Subject: cell: updated debug code --- src/gallium/drivers/cell/spu/spu_render.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 75a7f75abc..802455bf79 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -176,21 +176,12 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ushort *indexes; uint i, j; - -#if 0 - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ -#endif + D_PRINTF(CELL_DEBUG_CMD, + "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -293,8 +284,5 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } -#if 0 - printf("SPU %u: RENDER done\n", - spu.init.id); -#endif + D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n"); } -- cgit v1.2.3 From 0116ee1d1c341726b6ed23c2dddc4515e8a34385 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 20:46:43 -0600 Subject: cell: start some performance measurements Use the spu_write_decrementer() and spu_read_decrementer() functions to measure time. Convert to milliseconds according to the system timebase value. --- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_spu.c | 31 ++++++++++++++++++++++++++++++ src/gallium/drivers/cell/spu/spu_command.c | 15 +++++++++++++++ src/gallium/drivers/cell/spu/spu_render.c | 9 ++++++++- 4 files changed, 55 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 0ff2c491fb..469d56cda8 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -299,6 +299,7 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ + float inv_timebase; /**< 1.0/timebase, for perf measurement */ /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index a6e268b362..28e5e6d706 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -52,6 +52,35 @@ helpful headers: struct cell_global_info cell_global; +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + /** * Write a 1-word message to the given SPE mailbox. */ @@ -115,6 +144,7 @@ cell_start_spus(struct cell_context *cell) { static boolean one_time_init = FALSE; uint i, j; + uint timebase = get_timebase(); if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " @@ -138,6 +168,7 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d2c282a022..57d265fef7 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -670,6 +670,8 @@ cmd_batch(uint opcode) } +#define PERF 0 + /** * Main loop for SPEs: Get a command, execute it, repeat. @@ -678,6 +680,7 @@ void command_loop(void) { int exitFlag = 0; + uint t0, t1; D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); @@ -686,10 +689,16 @@ command_loop(void) D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + if (PERF) + spu_write_decrementer(~0); + /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + if (PERF) + t0 = spu_read_decrementer(); + switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); @@ -707,6 +716,12 @@ command_loop(void) printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } } D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 802455bf79..5515bb55c9 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -175,6 +175,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ubyte *vertices; const ushort *indexes; uint i, j; + uint num_tiles; D_PRINTF(CELL_DEBUG_CMD, "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", @@ -242,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + num_tiles = 0; + /** ** loop over tiles, rendering tris **/ @@ -255,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; + num_tiles++; + spu.cur_ctile_status = spu.ctile_status[ty][tx]; spu.cur_ztile_status = spu.ztile_status[ty][tx]; @@ -284,5 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - D_PRINTF(CELL_DEBUG_CMD, "RENDER done\n"); + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); } -- cgit v1.2.3 From 711f8a1dd94e2e1e715615d947e03015ef972326 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Thu, 30 Oct 2008 15:24:23 -0600 Subject: CELL: stencil bug fixes Two definitive bugs in stenciling were fixed. The first, reversed registers in the generated Select Bytes (selb) instruction, caused the stenciling INCR and DECR operations to fail dramatically, putting new values in where old values were supposed to be and vice versa. The second caused stencil tiles to not be read and written from main memory by the SPUs. A per-spu flag, spu.read_depth, was used to indicate whether the SPU should be reading depth tiles, and was set only when depth was enabled. A second flag, spu.read_stencil, was set when stenciling was enabled, but never referenced. As stenciling and depth are in the same tiles on the Cell, and there is no corresponding TAG_WRITE_TILE_STENCIL to complement TAG_WRITE_TILE_COLOR and TAG_WRITE_TILE_Z, I fixed this by eliminating the unused "spu.read_stencil", renaming "spu.read_depth" to "spu.read_depth_stencil", and setting it if either stenciling or depth is enabled. I also added an optimization to the fragment ops generation code, that avoids calculating stencil values and/or stencil writemask when the stencil operations are all KEEP. --- progs/trivial/tri-stencil.c | 13 ++++++++++-- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 25 ++++++++++++++++++------ src/gallium/drivers/cell/spu/spu_command.c | 3 +-- src/gallium/drivers/cell/spu/spu_main.h | 3 +-- src/gallium/drivers/cell/spu/spu_render.c | 4 ++-- src/gallium/drivers/cell/spu/spu_tri.c | 2 +- 6 files changed, 35 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/cell/spu/spu_render.c') diff --git a/progs/trivial/tri-stencil.c b/progs/trivial/tri-stencil.c index 5edbef26ce..7686e16aef 100644 --- a/progs/trivial/tri-stencil.c +++ b/progs/trivial/tri-stencil.c @@ -49,7 +49,15 @@ static void Key(unsigned char key, int x, int y) switch (key) { case 27: + printf("Exiting...\n"); exit(1); + case 'r': + printf("Redisplaying...\n"); + glutPostRedisplay(); + break; + default: + printf("No such key '%c'...\n", key); + break; } } @@ -89,7 +97,7 @@ static void Draw(void) glEnd(); #endif -#if 0 +#if 1 glStencilFunc(GL_EQUAL, 1, 1); glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); @@ -130,7 +138,8 @@ int main(int argc, char **argv) exit(1); } - glutInitWindowPosition(0, 0); glutInitWindowSize( 300, 300); + glutInitWindowPosition(0, 0); + glutInitWindowSize( 300, 300); type = GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH | GLUT_STENCIL; glutInitDisplayMode(type); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 4e1e53ecdc..8e4dd82404 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1282,7 +1282,7 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op, /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ spe_ai(f, newS_reg, fbS_reg, 1); /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); spe_release_register(f, equals_reg); break; @@ -1295,7 +1295,7 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op, /* Add Word Immediate with a (-1) value works */ spe_ai(f, newS_reg, fbS_reg, -1); /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); spe_release_register(f, equals_reg); break; @@ -1534,15 +1534,28 @@ gen_stencil_depth_test(struct spe_function *f, * meaning that we have to calculate the stencil values but do not * need to mask them), we can avoid generating code. Don't forget * that we need to consider backfacing stencil, if enabled. + * + * Note that if the backface stencil is *not* enabled, the backface + * stencil will have the same values as the frontface stencil. */ - if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { - /* Trivial: don't need to calculate stencil values, and don't need to - * write them back to the framebuffer. + if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP && + dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP && + dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP && + dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP && + dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP && + dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { + /* No changes to any stencil values */ + need_to_calculate_stencil_values = false; + need_to_writemask_stencil_values = false; + } + else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) { + /* All changes are writemasked out, so no need to calculate + * what those changes might be, and no need to write anything back. */ need_to_calculate_stencil_values = false; need_to_writemask_stencil_values = false; } - else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0xff)) { + else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) { /* Still trivial, but a little less so. We need to write the stencil * values, but we don't need to mask them. */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 63818d4c46..d726622d94 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -244,8 +244,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) } } - spu.read_depth = spu.depth_stencil_alpha.depth.enabled; - spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; + spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 668af10be2..692790c9f3 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -160,8 +160,7 @@ struct spu_global tile_t ztile ALIGN16_ATTRIB; /** Read depth/stencil tiles? */ - boolean read_depth; - boolean read_stencil; + boolean read_depth_stencil; /** Current tiles' status */ ubyte cur_ctile_status, cur_ztile_status; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5515bb55c9..7c225e2f27 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -98,7 +98,7 @@ my_tile(uint tx, uint ty) static INLINE void get_cz_tiles(uint tx, uint ty) { - if (spu.read_depth) { + if (spu.read_depth_stencil) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); @@ -153,7 +153,7 @@ static INLINE void wait_put_cz_tiles(void) { wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.read_depth) { + if (spu.read_depth_stencil) { wait_on_mask(1 << TAG_WRITE_TILE_Z); } } diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 4caf7d6b61..5f908159bb 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -369,7 +369,7 @@ flush_spans(void) } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - if (spu.read_depth) { + if (spu.read_depth_stencil) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); -- cgit v1.2.3