diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2008-09-11 16:05:15 +0100 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2008-09-11 16:05:15 +0100 |
commit | 0397b2bb41b0f337af2949a15bcd7d0e7e8a7dc1 (patch) | |
tree | 732c93d74ed806942a13bf15575a61aa4f001253 /src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | |
parent | 80af50b35b5a4e8890e15b28940576f8a1ac1476 (diff) | |
parent | e6887a5752774c18cf527477fdd3e57e4893ff3b (diff) |
Merge branch 'gallium-0.1' into gallium-0.2
A first attempt at moving gallium onto a branch directly off master...
It will be interesting to see how much work this takes to get running.
Have resolved the conflicts semi-arbitarily, not compiled or tested.
Conflicts:
.gitignore
Makefile
configs/config.mgw
configs/darwin
configs/darwin-x86ppc
configs/default
configs/freebsd-dri
configs/linux-dri
configs/linux-dri-xcb
configs/linux-fbdev
configs/linux-static
configs/linux-x86-64-static
configs/linux-x86-static
doxygen/Makefile
include/GL/gl.h
progs/demos/Makefile
progs/demos/descrip.mms
progs/demos/texenv.c
progs/egl/.gitignore
progs/egl/Makefile
progs/glsl/.gitignore
progs/glsl/Makefile
progs/glsl/convolutions.c
progs/samples/Makefile.mgw
progs/tests/.gitignore
progs/trivial/.gitignore
progs/trivial/point-param.c
progs/trivial/tri.c
progs/xdemos/.gitignore
progs/xdemos/glthreads.c
src/egl/drivers/demo/Makefile
src/egl/drivers/dri/Makefile
src/egl/main/Makefile
src/glu/Makefile
src/glu/sgi/Makefile
src/glu/sgi/Makefile.mgw
src/glut/glx/Makefile.mgw
src/glut/os2/WarpWin.cpp
src/glut/os2/glut_cindex.cpp
src/glut/os2/glut_gamemode.cpp
src/glut/os2/glut_win.cpp
src/glut/os2/glut_winmisc.cpp
src/glut/os2/os2_glx.cpp
src/glut/os2/os2_menu.cpp
src/glut/os2/os2_winproc.cpp
src/glw/Makefile
src/glx/x11/dri_glx.c
src/glx/x11/glxext.c
src/mesa/Makefile
src/mesa/Makefile.mgw
src/mesa/descrip.mms
src/mesa/drivers/beos/Makefile
src/mesa/drivers/common/descrip.mms
src/mesa/drivers/common/driverfuncs.c
src/mesa/drivers/directfb/Makefile
src/mesa/drivers/dri/Makefile.template
src/mesa/drivers/dri/common/dri_bufmgr.c
src/mesa/drivers/dri/common/dri_bufmgr.h
src/mesa/drivers/dri/common/dri_util.c
src/mesa/drivers/dri/common/extension_helper.h
src/mesa/drivers/dri/common/mmio.h
src/mesa/drivers/dri/common/utils.c
src/mesa/drivers/dri/common/utils.h
src/mesa/drivers/dri/glcore/Makefile
src/mesa/drivers/dri/i810/i810screen.c
src/mesa/drivers/dri/i915/intel_ioctl.c
src/mesa/drivers/dri/i915/intel_ioctl.h
src/mesa/drivers/dri/i915/intel_screen.c
src/mesa/drivers/dri/i915/server/i830_common.h
src/mesa/drivers/dri/i915/server/i830_dri.h
src/mesa/drivers/dri/i965/intel_screen.c
src/mesa/drivers/dri/i965/server/i830_common.h
src/mesa/drivers/dri/i965/server/i830_dri.h
src/mesa/drivers/dri/mach64/mach64_screen.c
src/mesa/drivers/dri/nouveau/nouveau_context.h
src/mesa/drivers/dri/nouveau/nouveau_fifo.c
src/mesa/drivers/dri/nouveau/nouveau_fifo.h
src/mesa/drivers/dri/nouveau/nouveau_screen.c
src/mesa/drivers/dri/nouveau/nouveau_screen.h
src/mesa/drivers/dri/r128/r128_tex.h
src/mesa/drivers/dri/savage/savageioctl.h
src/mesa/drivers/fbdev/Makefile
src/mesa/drivers/osmesa/Makefile
src/mesa/drivers/osmesa/descrip.mms
src/mesa/drivers/x11/Makefile
src/mesa/drivers/x11/descrip.mms
src/mesa/drivers/x11/xm_dd.c
src/mesa/glapi/glapi.c
src/mesa/glapi/glthread.c
src/mesa/main/api_validate.c
src/mesa/main/attrib.c
src/mesa/main/bufferobj.c
src/mesa/main/bufferobj.h
src/mesa/main/buffers.c
src/mesa/main/config.h
src/mesa/main/context.c
src/mesa/main/descrip.mms
src/mesa/main/drawpix.c
src/mesa/main/enums.c
src/mesa/main/fbobject.c
src/mesa/main/glheader.h
src/mesa/main/imports.c
src/mesa/main/mipmap.c
src/mesa/main/mm.c
src/mesa/main/mm.h
src/mesa/main/mtypes.h
src/mesa/main/points.c
src/mesa/main/sources
src/mesa/main/state.c
src/mesa/main/texcompress_fxt1.c
src/mesa/main/texenvprogram.c
src/mesa/main/texobj.c
src/mesa/main/texstate.c
src/mesa/main/texstore.c
src/mesa/math/descrip.mms
src/mesa/shader/arbprogram.c
src/mesa/shader/descrip.mms
src/mesa/shader/prog_execute.c
src/mesa/shader/prog_statevars.c
src/mesa/shader/prog_statevars.h
src/mesa/shader/prog_uniform.c
src/mesa/shader/program.c
src/mesa/shader/program.h
src/mesa/shader/shader_api.c
src/mesa/shader/slang/descrip.mms
src/mesa/shader/slang/library/slang_vertex_builtin_gc.h
src/mesa/sources
src/mesa/swrast/descrip.mms
src/mesa/swrast/s_drawpix.c
src/mesa/swrast/s_fragprog.c
src/mesa/swrast/s_readpix.c
src/mesa/swrast/s_span.c
src/mesa/swrast_setup/descrip.mms
src/mesa/tnl/descrip.mms
src/mesa/tnl/t_context.h
src/mesa/tnl/t_vp_build.c
src/mesa/tnl/tnl.h
src/mesa/vbo/descrip.mms
src/mesa/vbo/vbo_context.c
src/mesa/vbo/vbo_exec_array.c
src/mesa/x86-64/xform4.S
src/mesa/x86/rtasm/x86sse.c
src/mesa/x86/rtasm/x86sse.h
windows/VC6/progs/glut/glut.dsp
windows/VC7/mesa/gdi/gdi.vcproj
windows/VC7/mesa/glu/glu.vcproj
windows/VC7/mesa/mesa.sln
windows/VC7/mesa/mesa/mesa.vcproj
windows/VC7/mesa/osmesa/osmesa.vcproj
windows/VC7/progs/glut/glut.vcproj
windows/VC8/mesa/gdi/gdi.vcproj
windows/VC8/mesa/glu/glu.vcproj
windows/VC8/mesa/mesa.sln
windows/VC8/mesa/mesa/mesa.vcproj
windows/VC8/progs/glut/glut.vcproj
Diffstat (limited to 'src/gallium/drivers/cell/ppu/cell_vertex_fetch.c')
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 0000000000..2ece0250f6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,344 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" + +#include "cell_context.h" +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p Function that the transpose operation is to be appended to + * \param row0 Register containing row 0 of the source matrix + * \param row1 Register containing row 1 of the source matrix + * \param row2 Register containing row 2 of the source matrix + * \param row3 Register containing row 3 of the source matrix + * \param dest_ptr Register containing the address of the destination matrix + * \param shuf_ptr Register containing the address of the shuffled data + * \param count Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + * + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, + unsigned row0, unsigned row1, unsigned row2, + unsigned row3, unsigned dest_ptr, + unsigned shuf_ptr, unsigned count) +{ + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); + int t3; + int t4; + int col0; + int col1; + int col2; + int col3; + + + spe_lqd(p, shuf_hi, shuf_ptr, 3); + spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_shufb(p, t1, row0, row2, shuf_hi); + spe_shufb(p, t2, row0, row2, shuf_lo); + + + /* row0 and row2 are now no longer needed. Re-use those registers as + * temporaries. + */ + t3 = row0; + t4 = row2; + + spe_shufb(p, t3, row1, row3, shuf_hi); + spe_shufb(p, t4, row1, row3, shuf_lo); + + + /* row1 and row3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col0 = row1; + col1 = row3; + + spe_shufb(p, col0, t1, t3, shuf_hi); + if (count > 1) { + spe_shufb(p, col1, t1, t3, shuf_lo); + } + + /* t1 and t3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col2 = t1; + col3 = t3; + + if (count > 2) { + spe_shufb(p, col2, t2, t4, shuf_hi); + } + + if (count > 3) { + spe_shufb(p, col3, t2, t4, shuf_lo); + } + + + /* Store the results. Remember that the stqd instruction is encoded using + * the qword offset (stand-alone assemblers to the byte-offset to + * qword-offset conversion for you), so the byte-offset needs be divided by + * 16. + */ + switch (count) { + case 4: + spe_stqd(p, col3, dest_ptr, 3); + case 3: + spe_stqd(p, col2, dest_ptr, 2); + case 2: + spe_stqd(p, col1, dest_ptr, 1); + case 1: + spe_stqd(p, col0, dest_ptr, 0); + } + + + /* Release all of the temporary registers used. + */ + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); +} + + +static void +emit_fetch(struct spe_function *p, + unsigned in_ptr, unsigned *offset, + unsigned out_ptr, unsigned shuf_ptr, + enum pipe_format format) +{ + const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); + const unsigned type = pf_type(format); + const unsigned bytes = pf_size_x(format); + + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); + int float_zero = -1; + int float_one = -1; + float scale_signed = 0.0; + float scale_unsigned = 0.0; + + spe_lqd(p, v0, in_ptr, 0 + offset[0]); + spe_lqd(p, v1, in_ptr, 1 + offset[0]); + spe_lqd(p, v2, in_ptr, 2 + offset[0]); + spe_lqd(p, v3, in_ptr, 3 + offset[0]); + offset[0] += 4; + + switch (bytes) { + case 1: + scale_signed = 1.0f / 127.0f; + scale_unsigned = 1.0f / 255.0f; + spe_lqd(p, tmp, shuf_ptr, 1); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 2: + scale_signed = 1.0f / 32767.0f; + scale_unsigned = 1.0f / 65535.0f; + spe_lqd(p, tmp, shuf_ptr, 2); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 4: + scale_signed = 1.0f / 2147483647.0f; + scale_unsigned = 1.0f / 4294967295.0f; + break; + default: + assert(0); + break; + } + + switch (type) { + case PIPE_FORMAT_TYPE_FLOAT: + break; + case PIPE_FORMAT_TYPE_UNORM: + spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); + spe_cuflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_SNORM: + spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); + spe_csflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_USCALED: + spe_cuflt(p, v0, v0, 0); + break; + case PIPE_FORMAT_TYPE_SSCALED: + spe_csflt(p, v0, v0, 0); + break; + } + + + if (count < 4) { + float_one = spe_allocate_available_register(p); + spe_il(p, float_one, 1); + spe_cuflt(p, float_one, float_one, 0); + + if (count < 3) { + float_zero = spe_allocate_available_register(p); + spe_il(p, float_zero, 0); + } + } + + spe_release_register(p, tmp); + + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + + switch (count) { + case 1: + spe_stqd(p, float_zero, out_ptr, 1); + case 2: + spe_stqd(p, float_zero, out_ptr, 2); + case 3: + spe_stqd(p, float_one, out_ptr, 3); + } + + if (float_zero != -1) { + spe_release_register(p, float_zero); + } + + if (float_one != -1) { + spe_release_register(p, float_one); + } +} + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ +#if 0 + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct spe_function *p = &cell->attrib_fetch; + unsigned function_index[PIPE_MAX_ATTRIBS]; + unsigned unique_attr_formats; + int out_ptr; + int in_ptr; + int shuf_ptr; + unsigned i; + unsigned j; + + + /* Determine how many unique input attribute formats there are. At the + * same time, store the index of the lowest numbered attribute that has + * the same format as any non-unique format. + */ + unique_attr_formats = 1; + function_index[0] = 0; + for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + + for (j = 0; j < i; j++) { + if (curr_fmt == draw->vertex_element[j].src_format) { + break; + } + } + + if (j == i) { + unique_attr_formats++; + } + + function_index[i] = j; + } + + + /* Each fetch function can be a maximum of 34 instructions (note: this is + * actually a slight over-estimate). That means (34 * 4) = 136 bytes + * each maximum. + */ + spe_init_func(p, 136 * unique_attr_formats); + + + /* Allocate registers for the function's input parameters. + */ + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); + + + /* Generate code for the individual attribute fetch functions. + */ + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + unsigned offset; + + if (function_index[i] == i) { + cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr + - (void *) p->store); + + offset = 0; + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, + draw->vertex_element[i].src_format); + spe_bi(p, 0, 0, 0); + + /* Round up to the next 16-byte boundary. + */ + if ((((unsigned) p->store) & 0x0f) != 0) { + const unsigned align = ((unsigned) p->store) & 0x0f; + p->store = (uint32_t *) (((void *) p->store) + align); + } + } else { + /* Use the same function entry-point as a previously seen attribute + * with the same format. + */ + cell->attrib_fetch_offsets[i] = + cell->attrib_fetch_offsets[function_index[i]]; + } + } +#else + assert(0); +#endif +} |