diff options
Diffstat (limited to 'src/gallium/drivers/cell/spu')
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_command.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_exec.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_exec.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_funcs.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.h | 47 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 36 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_render.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_vertex_shader.c | 15 |
9 files changed, 70 insertions, 52 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db..55bd85bde2 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a5..d2166a4901 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d; unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16) union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i; unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 8605679940..0ca92af248 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16) struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d25..98919c43ff 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51..a9d72f84d5 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,50 +93,64 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ enum pipe_format color_format; enum pipe_format depth_format; - uint width, height; /**< size in pixels */ - uint width_tiles, height_tiles; /**< width and height in tiles */ + uint width; /**< width in pixels */ + uint height; /**< height in pixels */ + uint width_tiles; /**< width in tiles */ + uint height_tiles; /**< width in tiles */ uint color_clear_value; uint depth_clear_value; uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; - ushort width, height, depth; + ushort width; + ushort height; + ushort depth; ushort tiles_per_row; uint bytes_per_image; /** texcoord scale factors */ - vector float scale_s, scale_t, scale_r; + vector float scale_s; + vector float scale_t; + vector float scale_r; /** texcoord masks (if REPEAT then size-1, else ~0) */ - vector signed int mask_s, mask_t, mask_r; + vector signed int mask_s; + vector signed int mask_t; + vector signed int mask_r; /** texcoord clamp limits */ - vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; + vector signed int max_s; + vector signed int max_t; + vector signed int max_r; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,18 +169,19 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) tile_t ctile; + PIPE_ALIGN_VAR(16) tile_t ztile; /** Read depth/stencil tiles? */ boolean read_depth_stencil; /** Current tiles' status */ - ubyte cur_ctile_status, cur_ztile_status; + ubyte cur_ctile_status; + ubyte cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; + PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +190,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +202,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index eba9f95cf1..5328374080 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -207,9 +207,9 @@ spu_fallback_fragment_ops(uint x, uint y, * If we'll need the current framebuffer/tile colors for blending * or logicop or colormask, fetch them now. */ - if (spu.blend.blend_enable || + if (spu.blend.rt[0].blend_enable || spu.blend.logicop_enable || - spu.blend.colormask != 0xf) { + spu.blend.rt[0].colormask != 0xf) { #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ fbc0 = colorTile->ui[y][x*2+0]; @@ -228,7 +228,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Do blending */ - if (spu.blend.blend_enable) { + if (spu.blend.rt[0].blend_enable) { /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; vector float term2r, term2g, term2b, term2a; @@ -261,7 +261,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Src RGB terms (fragment color * factor) */ - switch (spu.blend.rgb_src_factor) { + switch (spu.blend.rt[0].rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: term1r = fragR; term1g = fragG; @@ -310,7 +310,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Src Alpha term (fragment alpha * factor) */ - switch (spu.blend.alpha_src_factor) { + switch (spu.blend.rt[0].alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: term1a = fragA; break; @@ -338,7 +338,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Dest RGB terms (framebuffer color * factor) */ - switch (spu.blend.rgb_dst_factor) { + switch (spu.blend.rt[0].rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: term2r = fbRGBA[0]; term2g = fbRGBA[1]; @@ -394,7 +394,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Compute Dest Alpha term (framebuffer alpha * factor) */ - switch (spu.blend.alpha_dst_factor) { + switch (spu.blend.rt[0].alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: term2a = fbRGBA[3]; break; @@ -427,7 +427,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Combine Src/Dest RGB terms */ - switch (spu.blend.rgb_func) { + switch (spu.blend.rt[0].rgb_func) { case PIPE_BLEND_ADD: fragR = spu_add(term1r, term2r); fragG = spu_add(term1g, term2g); @@ -460,7 +460,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Combine Src/Dest A term */ - switch (spu.blend.alpha_func) { + switch (spu.blend.rt[0].alpha_func) { case PIPE_BLEND_ADD: fragA = spu_add(term1a, term2a); break; @@ -527,29 +527,29 @@ spu_fallback_fragment_ops(uint x, uint y, /* * Do color masking */ - if (spu.blend.colormask != 0xf) { + if (spu.blend.rt[0].colormask != 0xf) { uint cmask = 0x0; /* each byte corresponds to a color channel */ /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.colormask & PIPE_MASK_R) + if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ - if (spu.blend.colormask & PIPE_MASK_G) + if (spu.blend.rt[0].colormask & PIPE_MASK_G) cmask |= 0x0000ff00; /* green */ - if (spu.blend.colormask & PIPE_MASK_B) + if (spu.blend.rt[0].colormask & PIPE_MASK_B) cmask |= 0x000000ff; /* blue */ - if (spu.blend.colormask & PIPE_MASK_A) + if (spu.blend.rt[0].colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.colormask & PIPE_MASK_R) + if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ - if (spu.blend.colormask & PIPE_MASK_G) + if (spu.blend.rt[0].colormask & PIPE_MASK_G) cmask |= 0x00ff0000; /* green */ - if (spu.blend.colormask & PIPE_MASK_B) + if (spu.blend.rt[0].colormask & PIPE_MASK_B) cmask |= 0xff000000; /* blue */ - if (spu.blend.colormask & PIPE_MASK_A) + if (spu.blend.rt[0].colormask & PIPE_MASK_A) cmask |= 0x000000ff; /* alpha */ break; default: diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073ab..14987e3c3a 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a5..087963960d 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,8 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16) static const qword +fetch_shuffle_data[5] = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +111,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16) qword in[2 * 4]; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d39..3e9804bf8e 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; + PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16) unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]; struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16) unsigned char +immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void |