From 397b81bd1c7984b1667af7ef954e053263a7a661 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 15 Feb 2008 09:43:13 -0800 Subject: Move cell_vertex_fetch.c for recent code reorg. --- src/mesa/pipe/cell/ppu/cell_vertex_fetch.c | 392 ----------------------------- 1 file changed, 392 deletions(-) delete mode 100644 src/mesa/pipe/cell/ppu/cell_vertex_fetch.c (limited to 'src/mesa') diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c b/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index f2432f4ff5..0000000000 --- a/src/mesa/pipe/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" - -#include "pipe/cell/ppu/cell_context.h" -#include "ppc/rtasm/spe_asm.h" - -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param m Live register mask - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3); - case 3: - spe_stqd(p, col2, dest_ptr, 2); - case 2: - spe_stqd(p, col1, dest_ptr, 1); - case 1: - spe_stqd(p, col0, dest_ptr, 0); - } - - - /* Release all of the temporary registers used. - */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); -} - - -static void -emit_fetch(struct spe_function *p, register_mask *m, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = allocate_available_register(m); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = allocate_available_register(m); - spe_il(p, float_zero, 0); - } - } - - release_register(m, tmp); - - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1); - case 2: - spe_stqd(p, float_zero, out_ptr, 2); - case 3: - spe_stqd(p, float_one, out_ptr, 3); - } - - if (float_zero != -1) { - release_register(m, float_zero); - } - - if (float_one != -1) { - release_register(m, float_one); - } -} - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - register_mask m = ~0; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_ATTRIB_MAX]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. - */ - spe_init_func(p, 136 * unique_attr_formats); - - - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } -} -- cgit v1.2.3