/************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * (C) Copyright IBM Corporation 2008 * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /* * Authors: * Keith Whitwell * Ian Romanick */ #include #include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_exec.h" #include "spu_vertex_shader.h" #include "spu_main.h" #define CACHE_NAME attribute #define CACHED_TYPE qword #define CACHE_TYPE CACHE_TYPE_RO #define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 #include /* Yes folks, this is ugly. */ #undef CACHE_NWAY #undef CACHE_NSETS #define CACHE_NAME attribute #define CACHE_NWAY 4 #define CACHE_NSETS (1U << 6) #define DRAW_DBG 0 typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { /* Shuffle used by CVT_64_FLOAT */ { 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, }, /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED */ { 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, }, /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED */ { 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, }, /* High value shuffle used by trans4x4. */ { 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 }, /* Low value shuffle used by trans4x4. */ { 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F } }; /** * Fetch between 1 and 32 bytes from an unaligned address */ static INLINE void fetch_unaligned(qword *dst, unsigned ea, unsigned size) { qword tmp[4] ALIGN16_ATTRIB; const int shift = ea & 0x0f; const unsigned aligned_start_ea = ea & ~0x0f; const unsigned aligned_end_ea = (ea + size) & ~0x0f; const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1; unsigned i; if (shift == 0) { /* Data is already aligned. Fetch directly into the destination buffer. */ for (i = 0; i < num_entries; i++) { dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); } } else { /* Fetch data from the cache to the local buffer. */ for (i = 0; i < num_entries; i++) { tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16)); } /* Fix the alignment of the data and write to the destination buffer. */ for (i = 0; i < ((size + 15) / 16); i++) { dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift), (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16)); } } } /** * Fetch vertex attributes for 'count' vertices. */ static void generic_vertex_fetch(struct spu_vs_context *draw, struct spu_exec_machine *machine, const unsigned *elts, unsigned count) { unsigned nr_attrs = draw->vertex_fetch.nr_attrs; unsigned attr; assert(count <= 4); #if DRAW_DBG printf("SPU: %s count = %u, nr_attrs = %u\n", __FUNCTION__, count, nr_attrs); #endif /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { const unsigned pitch = draw->vertex_fetch.pitch[attr]; const uint64_t src = draw->vertex_fetch.src_ptr[attr]; const spu_fetch_func fetch = (spu_fetch_func) (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); unsigned i; unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; qword in[2 * 4] ALIGN16_ATTRIB; /* Fetch four attributes for four vertices. */ idx = 0; for (i = 0; i < count; i++) { const uint64_t addr = src + (elts[i] * pitch); #if DRAW_DBG printf("SPU: fetching = 0x%llx\n", addr); #endif fetch_unaligned(& in[idx], addr, bytes_per_entry); idx += quads_per_entry; } /* Be nice and zero out any missing vertices. */ (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); /* Convert all 4 vertices to vectors of float. */ (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); } } void spu_update_vertex_fetch( struct spu_vs_context *draw ) { unsigned i; /* Invalidate the vertex cache. */ for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { CACHELINE_CLEARVALID(i); } draw->vertex_fetch.fetch_func = generic_vertex_fetch; }