summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2009-01-09 10:08:06 +0000
committerKeith Whitwell <keithw@vmware.com>2009-01-09 10:08:06 +0000
commite3734593aea202e99e77febea7b86c904080939f (patch)
tree69856674e2062c55ec5eec95eb0e31225a943084 /src/gallium
parent221352bbd79a0ea92ce31cffb65537f62ee5668e (diff)
parent5cad143e545775acacee294049345c6a3868c51d (diff)
Merge commit 'origin/gallium-0.2' into gallium-xlib-rework
Conflicts: progs/glsl/Makefile
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c21
-rw-r--r--src/gallium/auxiliary/draw/draw_vbuf.h14
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c2
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.c10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c2
-rw-r--r--src/gallium/auxiliary/util/SConscript1
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_context.c2
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c1327
-rw-r--r--src/gallium/drivers/cell/ppu/cell_screen.c8
-rw-r--r--src/gallium/drivers/cell/spu/spu_shuffle.h186
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c209
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c4
-rw-r--r--src/gallium/drivers/nv30/nv30_query.c4
-rw-r--r--src/gallium/drivers/nv30/nv30_state_emit.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_query.c4
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c42
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c95
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c2
-rw-r--r--src/gallium/drivers/softpipe/sp_context.h2
-rw-r--r--src/gallium/drivers/softpipe/sp_quad_fs.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_query.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c51
-rw-r--r--src/gallium/drivers/trace/tr_context.c4
-rw-r--r--src/gallium/include/pipe/p_compiler.h31
-rw-r--r--src/gallium/include/pipe/p_context.h2
-rw-r--r--src/gallium/state_trackers/python/p_context.i2
-rw-r--r--src/gallium/winsys/gdi/SConscript2
-rw-r--r--src/gallium/winsys/gdi/gdi_softpipe_winsys.c19
35 files changed, 1227 insertions, 856 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index f34c68728e..03e842ce08 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -33,6 +33,7 @@
#include "draw_private.h"
#include "draw_pipe.h"
#include "draw_context.h"
+#include "draw_vbuf.h"
static boolean points( unsigned prim )
{
@@ -52,16 +53,28 @@ static boolean triangles( unsigned prim )
}
/**
- * Check if we need any special pipeline stages, or whether
- * prims/verts can go through untouched. Don't test for bypass
- * clipping or vs modes, this function is just about the primitive
- * pipeline stages.
+ * Default version of a function to check if we need any special
+ * pipeline stages, or whether prims/verts can go through untouched.
+ * Don't test for bypass clipping or vs modes, this function is just
+ * about the primitive pipeline stages.
+ *
+ * This can be overridden by the driver.
*/
boolean
draw_need_pipeline(const struct draw_context *draw,
const struct pipe_rasterizer_state *rasterizer,
unsigned int prim )
{
+ /* If the driver has overridden this, use that version:
+ */
+ if (draw->render &&
+ draw->render->need_pipeline)
+ {
+ return draw->render->need_pipeline( draw->render,
+ rasterizer,
+ prim );
+ }
+
/* Don't have to worry about triangles turning into lines/points
* and triggering the pipeline, because we have to trigger the
* pipeline *anyway* if unfilled mode is active.
diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h
index b0aa2df309..7e1df88f0b 100644
--- a/src/gallium/auxiliary/draw/draw_vbuf.h
+++ b/src/gallium/auxiliary/draw/draw_vbuf.h
@@ -30,7 +30,7 @@
* Vertex buffer drawing stage.
*
* \author Keith Whitwell <keith@tungstengraphics.com>
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Jose Fonseca <jrfonsec@tungstengraphics.com>
*/
#ifndef DRAW_VBUF_H_
@@ -38,6 +38,7 @@
+struct pipe_rasterizer_state;
struct draw_context;
struct vertex_info;
@@ -55,6 +56,17 @@ struct vbuf_render {
unsigned max_vertex_buffer_bytes;
/**
+ * Query if the hardware driver needs assistance for a particular
+ * combination of rasterizer state and primitive.
+ *
+ * Currently optional.
+ */
+ boolean (*need_pipeline)(const struct vbuf_render *render,
+ const struct pipe_rasterizer_state *rasterizer,
+ unsigned int prim );
+
+
+ /**
* Get the hardware vertex format.
*
* XXX: have this in draw_context instead?
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index a943607d7e..c143cf2372 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -81,9 +81,9 @@ struct vertex_info
* memcmp() comparisons.
*/
struct {
- ubyte interp_mode:4; /**< INTERP_x */
- ubyte emit:4; /**< EMIT_x */
- ubyte src_index; /**< map to post-xform attribs */
+ unsigned interp_mode:4; /**< INTERP_x */
+ unsigned emit:4; /**< EMIT_x */
+ unsigned src_index:8; /**< map to post-xform attribs */
} attrib[PIPE_MAX_SHADER_INPUTS];
};
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 7f305304ff..c057cd67fd 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw,
const float (*constants)[4],
unsigned size )
{
- if (((unsigned)constants) & 0xf) {
+ if (((uintptr_t)constants) & 0xf) {
if (size > draw->vs.const_storage_size) {
if (draw->vs.aligned_constant_storage)
align_free((void *)draw->vs.aligned_constant_storage);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
index b65bfa7bbd..e9015ec2eb 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -261,7 +261,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
inst.inst.vB = vB;
inst.inst.op2 = op2;
emit_instruction(p, inst.bits);
-};
+}
union vxr_inst {
@@ -287,7 +287,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB)
inst.inst.rC = 0;
inst.inst.op2 = op2;
emit_instruction(p, inst.bits);
-};
+}
union va_inst {
@@ -313,7 +313,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC)
inst.inst.vC = vC;
inst.inst.op2 = op2;
emit_instruction(p, inst.bits);
-};
+}
union i_inst {
@@ -430,7 +430,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si)
inst.inst.ra = ra;
inst.inst.si = (unsigned) (si & 0xffff);
emit_instruction(p, inst.bits);
-};
+}
union a_inst {
@@ -459,7 +459,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
inst.inst.op2 = op2;
inst.inst.rc = rc;
emit_instruction(p, inst.bits);
-};
+}
union xo_inst {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index c2a0ac5aff..2ed8c2bf07 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -180,14 +180,16 @@ _dump_register_ind(
uint file,
int index,
uint ind_file,
- int ind_index )
+ int ind_index,
+ uint ind_swizzle )
{
ENM( file, file_names );
CHR( '[' );
ENM( ind_file, file_names );
CHR( '[' );
SID( ind_index );
- CHR( ']' );
+ TXT( "]." );
+ ENM( ind_swizzle, swizzle_names );
if (index != 0) {
if (index > 0)
CHR( '+' );
@@ -385,7 +387,8 @@ iter_instruction(
src->SrcRegister.File,
src->SrcRegister.Index,
src->SrcRegisterInd.File,
- src->SrcRegisterInd.Index );
+ src->SrcRegisterInd.Index,
+ src->SrcRegisterInd.SwizzleX );
}
else {
_dump_register(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index be25cb45a0..c575b6c3e1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -646,7 +646,6 @@ tgsi_dump_c(
struct tgsi_full_declaration fd;
uint ignored = flags & TGSI_DUMP_C_IGNORED;
uint deflt = flags & TGSI_DUMP_C_DEFAULT;
- uint instno = 0;
tgsi_parse_init( &parse, tokens );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index cfc7ea8e89..1239f6c076 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -124,7 +124,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
/* only first 32 regs will appear in this bitfield */
info->file_mask[file] |= (1 << reg);
info->file_count[file]++;
- info->file_max[file] = MAX2(info->file_max[file], (int)i);
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
if (file == TGSI_FILE_INPUT) {
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
index f5bd308083..5df932c0f7 100644
--- a/src/gallium/auxiliary/util/SConscript
+++ b/src/gallium/auxiliary/util/SConscript
@@ -23,6 +23,7 @@ util = env.ConvenienceLibrary(
'u_stream_wd.c',
'u_tile.c',
'u_time.c',
+ 'u_timed_winsys.c',
])
auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 30f161fe3b..b5eb896b7a 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1198,7 +1198,7 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
{
struct pipe_context *pipe = ctx->pipe;
struct pipe_screen *screen = pipe->screen;
- uint dstLevel, zslice;
+ uint dstLevel, zslice = 0;
assert(pt->block.width == 1);
assert(pt->block.height == 1);
diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
index 22d552d8e3..8f502823f9 100644
--- a/src/gallium/drivers/cell/ppu/cell_context.c
+++ b/src/gallium/drivers/cell/ppu/cell_context.c
@@ -162,7 +162,7 @@ cell_create_context(struct pipe_screen *screen,
*/
/* This call only works with SDK 3.0. Anyone still using 2.1??? */
cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
- cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0);
+ cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
if (cell->debug_flags) {
printf("Cell: found %d Cell(s) with %u SPUs\n",
cell->num_cells, cell->num_spus);
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 96a1743fc1..8f3deb482e 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -2,6 +2,7 @@
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
@@ -75,13 +76,25 @@ struct codegen
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
+ int addr_reg; /**< address register, integer values */
+
/** Per-instruction temps / intermediate temps */
int num_itemps;
int itemps[12];
/** Current IF/ELSE/ENDIF nesting level */
int if_nesting;
- /** Index of execution mask register */
+ /** Current BGNLOOP/ENDLOOP nesting level */
+ int loop_nesting;
+ /** Location of start of current loop */
+ int loop_start;
+
+ /** Index of if/conditional mask register */
+ int cond_mask_reg;
+ /** Index of loop mask register */
+ int loop_mask_reg;
+
+ /** Index of master execution mask register */
int exec_mask_reg;
/** KIL mask: indicates which fragments have been killed */
@@ -145,10 +158,33 @@ get_const_one_reg(struct codegen *gen)
/**
- * Return index of the pixel execution mask.
+ * Return index of the address register.
+ * Used for indirect register loads/stores.
+ */
+static int
+get_address_reg(struct codegen *gen)
+{
+ if (gen->addr_reg <= 0) {
+ gen->addr_reg = spe_allocate_available_register(gen->f);
+
+ spe_indent(gen->f, 4);
+ spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
+
+ /* init addr = {0, 0, 0, 0} */
+ spe_zero(gen->f, gen->addr_reg);
+
+ spe_indent(gen->f, -4);
+ }
+
+ return gen->addr_reg;
+}
+
+
+/**
+ * Return index of the master execution mask.
* The register is allocated an initialized upon the first call.
*
- * The pixel execution mask controls which pixels in a quad are
+ * The master execution mask controls which pixels in a quad are
* modified, according to surrounding conditionals, loops, etc.
*/
static int
@@ -157,19 +193,40 @@ get_exec_mask_reg(struct codegen *gen)
if (gen->exec_mask_reg <= 0) {
gen->exec_mask_reg = spe_allocate_available_register(gen->f);
- spe_indent(gen->f, 4);
- spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
-
- /* exec_mask = {~0, ~0, ~0, ~0} */
+ /* XXX this may not be needed */
+ spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
spe_load_int(gen->f, gen->exec_mask_reg, ~0);
-
- spe_indent(gen->f, -4);
}
return gen->exec_mask_reg;
}
+/** Return index of the conditional (if/else) execution mask register */
+static int
+get_cond_mask_reg(struct codegen *gen)
+{
+ if (gen->cond_mask_reg <= 0) {
+ gen->cond_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->cond_mask_reg;
+}
+
+
+/** Return index of the loop execution mask register */
+static int
+get_loop_mask_reg(struct codegen *gen)
+{
+ if (gen->loop_mask_reg <= 0) {
+ gen->loop_mask_reg = spe_allocate_available_register(gen->f);
+ }
+
+ return gen->loop_mask_reg;
+}
+
+
+
static boolean
is_register_src(struct codegen *gen, int channel,
const struct tgsi_full_src_register *src)
@@ -231,16 +288,22 @@ get_src_reg(struct codegen *gen,
spe_xor(gen->f, reg, reg, reg);
}
else {
+ int index = src->SrcRegister.Index;
+
assert(swizzle < 4);
+ if (src->SrcRegister.Indirect) {
+ /* XXX unfinished */
+ }
+
switch (src->SrcRegister.File) {
case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
+ reg = gen->temp_regs[index][swizzle];
break;
case TGSI_FILE_INPUT:
{
/* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + swizzle;
+ int offset = index * 4 + swizzle;
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
@@ -248,12 +311,12 @@ get_src_reg(struct codegen *gen,
}
break;
case TGSI_FILE_IMMEDIATE:
- reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
+ reg = gen->imm_regs[index][swizzle];
break;
case TGSI_FILE_CONSTANT:
{
/* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + swizzle;
+ int offset = index * 4 + swizzle;
reg = get_itemp(gen);
reg_is_itemp = TRUE;
/* Load: reg = memory[(machine_reg) + offset] */
@@ -322,7 +385,7 @@ get_dst_reg(struct codegen *gen,
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0)
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0)
reg = get_itemp(gen);
else
reg = gen->temp_regs[dest->DstRegister.Index][channel];
@@ -367,7 +430,7 @@ store_dest_reg(struct codegen *gen,
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0) {
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
int exec_reg = get_exec_mask_reg(gen);
/* Mix d with new value according to exec mask:
@@ -384,7 +447,7 @@ store_dest_reg(struct codegen *gen,
{
/* offset is measured in quadwords, not bytes */
int offset = dest->DstRegister.Index * 4 + channel;
- if (gen->if_nesting > 0) {
+ if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
int exec_reg = get_exec_mask_reg(gen);
int curval_reg = get_itemp(gen);
/* First read the current value from memory:
@@ -488,96 +551,118 @@ emit_epilogue(struct codegen *gen)
}
+#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
+ for (ch = 0; ch < 4; ch++) \
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
+
+
+static boolean
+emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch = 0, src_reg, addr_reg;
+
+ spe_comment(gen->f, -4, "ARL:");
+
+ src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ addr_reg = get_address_reg(gen);
+
+ /* convert float to int */
+ spe_cflts(gen->f, addr_reg, src_reg, 0);
+
+ free_itemps(gen);
+
+ return TRUE;
+}
+
+
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, src_reg[4], dst_reg[4];
spe_comment(gen->f, -4, "MOV:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
- is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
- /* special-case: register to memory store */
- store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
- }
- else {
- spe_move(gen->f, dst_reg[ch], src_reg[ch]);
- store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
- }
- free_itemps(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
+ is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+ /* special-case: register to memory store */
+ store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+ else {
+ spe_move(gen->f, dst_reg[ch], src_reg[ch]);
+ store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
}
}
- return true;
+
+ free_itemps(gen);
+
+ return TRUE;
}
/**
- * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
- * becomes (up to) four SPU "fa" instructions because we're doing SOA
- * processing.
+ * Emit binary operation
*/
static boolean
-emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, s1_reg[4], s2_reg[4], d_reg[4];
- spe_comment(gen->f, -4, "ADD:");
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
+ spe_comment(gen->f, -4, "ADD:");
+ break;
+ case TGSI_OPCODE_SUB:
+ spe_comment(gen->f, -4, "SUB:");
+ break;
+ case TGSI_OPCODE_MUL:
+ spe_comment(gen->f, -4, "MUL:");
+ break;
+ default:
+ assert(0);
+ }
+
/* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
- for (ch = 0; ch < 4; ch++) {
- /* If the dest R, G, B or A writemask is enabled... */
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
}
- /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* Emit actual SPE instruction: d = s1 + s2 */
+
+ /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* Emit actual SPE instruction: d = s1 + s2 */
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ADD:
spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- /* Free any intermediate temps we allocated */
- free_itemps(gen);
+ break;
+ case TGSI_OPCODE_SUB:
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_MUL:
+ spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ default:
+ ;
}
}
- return true;
-}
-/**
- * Emit subtract. See emit_ADD for comments.
- */
-static boolean
-emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], s2_reg[4], d_reg[4];
- spe_comment(gen->f, -4, "SUB:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- }
+ /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* d = s1 - s2 */
- spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
- }
- return true;
+
+ /* Free any intermediate temps we allocated */
+ free_itemps(gen);
+
+ return TRUE;
}
+
/**
* Emit multiply add. See emit_ADD for comments.
*/
@@ -586,23 +671,20 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
spe_comment(gen->f, -4, "MAD:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* d = s1 * s2 + s3 */
- spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- return true;
+ free_itemps(gen);
+ return TRUE;
}
@@ -615,132 +697,108 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
spe_comment(gen->f, -4, "LERP:");
/* setup/get src/dst/temp regs */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
}
/* d = s3 + s1(s2 - s3) */
/* do all subtracts, then all fma, then all stores to better pipeline */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
+
+
/**
- * Emit multiply. See emit_ADD for comments.
+ * Emit reciprocal or recip sqrt.
*/
static boolean
-emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch, s1_reg[4], s2_reg[4], d_reg[4];
- spe_comment(gen->f, -4, "MUL:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- }
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4];
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
+ spe_comment(gen->f, -4, "RCP:");
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- /* d = s1 * s2 */
- spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ else {
+ assert(inst->Instruction.Opcode == TGSI_OPCODE_RSQ);
+ spe_comment(gen->f, -4, "RSQ:");
}
- return true;
-}
-/**
- * Emit reciprocal. See emit_ADD for comments.
- */
-static boolean
-emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- spe_comment(gen->f, -4, "RCP:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* d = 1/s1 */
- spe_frest(gen->f, d_reg, s1_reg);
- spe_fi(gen->f, d_reg, s1_reg, d_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
}
- return true;
-}
-/**
- * Emit reciprocal sqrt. See emit_ADD for comments.
- */
-static boolean
-emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- spe_comment(gen->f, -4, "RSQ:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* d = 1/s1 */
- spe_frsqest(gen->f, d_reg, s1_reg);
- spe_fi(gen->f, d_reg, s1_reg, d_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
+ /* tmp = 1/s1 */
+ spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
+ }
+ else {
+ /* tmp = 1/sqrt(s1) */
+ spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
}
}
- return true;
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = float_interp(s1, tmp) */
+ spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
}
+
/**
* Emit absolute value. See emit_ADD for comments.
*/
static boolean
emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, s1_reg[4], d_reg[4];
+ const int bit31mask_reg = get_itemp(gen);
+
spe_comment(gen->f, -4, "ABS:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- const int bit31mask_reg = get_itemp(gen);
- /* mask with bit 31 set, the rest cleared */
- spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
+ /* mask with bit 31 set, the rest cleared */
+ spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
- /* d = sign bit cleared in s1 */
- spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ /* d = sign bit cleared in s1 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
}
- return true;
+
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return TRUE;
}
/**
@@ -775,16 +833,14 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* t0 = t0 + t1 */
spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
/**
@@ -824,16 +880,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* t0 = t0 + t1 */
spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, t0_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
/**
@@ -867,16 +921,14 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* t = w1 + t */
spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- spe_move(gen->f, d_reg, tmp_reg);
- store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ spe_move(gen->f, d_reg, tmp_reg);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
/**
@@ -911,17 +963,15 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_frsqest(gen->f, t1_reg, t0_reg);
spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
- for (ch = 0; ch < 3; ch++) { /* NOTE: omit W channel */
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- /* dst = src[ch] * t1 */
- spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ /* dst = src[ch] * t1 */
+ spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
@@ -978,201 +1028,101 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
free_itemps(gen);
- return true;
+ return TRUE;
}
+
/**
- * Emit set-if-greater-than.
+ * Emit inequality instruction.
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
* the result but OpenGL/TGSI needs 0.0 and 1.0 results.
* We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
*/
static boolean
-emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
+ bool complement = FALSE;
- spe_comment(gen->f, -4, "SGT:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 > s2) */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SGT:
+ spe_comment(gen->f, -4, "SGT:");
+ break;
+ case TGSI_OPCODE_SLT:
+ spe_comment(gen->f, -4, "SLT:");
+ break;
+ case TGSI_OPCODE_SGE:
+ spe_comment(gen->f, -4, "SGE:");
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SLE:
+ spe_comment(gen->f, -4, "SLE:");
+ complement = TRUE;
+ break;
+ case TGSI_OPCODE_SEQ:
+ spe_comment(gen->f, -4, "SEQ:");
+ break;
+ case TGSI_OPCODE_SNE:
+ spe_comment(gen->f, -4, "SNE:");
+ complement = TRUE;
+ break;
+ default:
+ ;
}
- return true;
-}
+ one_reg = get_const_one_reg(gen);
-/**
- * Emit set-if_less-then. See emit_SGT for comments.
- */
-static boolean
-emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- spe_comment(gen->f, -4, "SLT:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 < s2) */
- spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
}
- return true;
-}
-
-/**
- * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
- */
-static boolean
-emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- spe_comment(gen->f, -4, "SGE:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 >= s2) */
- spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = ~d & one_reg */
- spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_SGT:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SLT:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ break;
+ case TGSI_OPCODE_SGE:
+ spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
+ break;
+ case TGSI_OPCODE_SLE:
+ spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SEQ:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ case TGSI_OPCODE_SNE:
+ spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
+ break;
+ default:
+ assert(0);
}
}
- return true;
-}
-
-/**
- * Emit set-if_less-then-or-equal. See emit_SGT for comments.
- */
-static boolean
-emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- spe_comment(gen->f, -4, "SLE:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 <= s2) */
- spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = ~d & one_reg */
- spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ /* convert d from 0x0/0xffffffff to 0.0/1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ /* d = d & one_reg */
+ if (complement)
+ spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
+ else
+ spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
}
- return true;
-}
-
-/**
- * Emit set-if_equal. See emit_SGT for comments.
- */
-static boolean
-emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- spe_comment(gen->f, -4, "SEQ:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 == s2) */
- spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- return true;
+ free_itemps(gen);
+ return TRUE;
}
-/**
- * Emit set-if_not_equal. See emit_SGT for comments.
- */
-static boolean
-emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- spe_comment(gen->f, -4, "SNE:");
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
-
- /* d = (s1 != s2) */
- spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
- spe_nor(gen->f, d_reg, d_reg, d_reg);
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- /* d = d & one_reg */
- spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
-
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
- }
-
- return true;
-}
/**
- * Emit compare. See emit_SGT for comments.
+ * Emit compare.
*/
static boolean
emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
@@ -1181,26 +1131,24 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_comment(gen->f, -4, "CMP:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int zero_reg = get_itemp(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ int zero_reg = get_itemp(gen);
- spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+ spe_zero(gen->f, zero_reg);
- /* d = (s1 < 0) ? s2 : s3 */
- spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
- spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
+ /* d = (s1 < 0) ? s2 : s3 */
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
}
- return true;
+ return TRUE;
}
/**
@@ -1211,29 +1159,34 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, s1_reg[4], d_reg[4];
spe_comment(gen->f, -4, "TRUNC:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
- /* Convert float to int */
- spe_cflts(gen->f, d_reg, s1_reg, 0);
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
+ }
- /* Convert int to float */
- spe_csflt(gen->f, d_reg, d_reg, 0);
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
+ }
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- return true;
+ free_itemps(gen);
+ return TRUE;
}
+
/**
* Emit floor.
* If negative int subtract one
@@ -1243,77 +1196,103 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
static boolean
emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
spe_comment(gen->f, -4, "FLR:");
- int zero_reg = get_itemp(gen);
- spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int tmp_reg = get_itemp(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
- /* If negative, subtract 1.0 */
- spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
- spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
- spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
- /* Convert float to int */
- spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
- /* Convert int to float */
- spe_csflt(gen->f, d_reg, tmp_reg, 0);
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
+ }
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- return true;
+ free_itemps(gen);
+ return TRUE;
}
+
/**
* Compute frac = Input - FLR(Input)
*/
static boolean
emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- int ch;
+ int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
spe_comment(gen->f, -4, "FRC:");
- int zero_reg = get_itemp(gen);
- spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+ zero_reg = get_itemp(gen);
+ spe_zero(gen->f, zero_reg);
+ one_reg = get_const_one_reg(gen);
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int tmp_reg = get_itemp(gen);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
+ }
- /* If negative, subtract 1.0 */
- spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
- spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
- spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
+ /* If negative, subtract 1.0 */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
- /* Convert float to int */
- spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
+ /* Convert float to int */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
- /* Convert int to float */
- spe_csflt(gen->f, tmp_reg, tmp_reg, 0);
+ /* Convert int to float */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
+ }
- /* d = s1 - FLR(s1) */
- spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
+ /* d = s1 - FLR(s1) */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
+ }
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ /* store result */
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
- return true;
+ free_itemps(gen);
+ return TRUE;
}
@@ -1379,73 +1358,71 @@ emit_function_call(struct codegen *gen,
retval_reg = spe_allocate_available_register(gen->f);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int d_reg;
- ubyte usedRegs[SPE_NUM_REGS];
- uint i, numUsed;
-
- if (!scalar) {
- for (a = 0; a < num_args; a++) {
- s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int d_reg;
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ if (!scalar) {
+ for (a = 0; a < num_args; a++) {
+ s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
}
+ }
- d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- if (!scalar || !func_called) {
- /* for a scalar function, we'll really only call the function once */
+ if (!scalar || !func_called) {
+ /* for a scalar function, we'll really only call the function once */
- numUsed = spe_get_registers_used(gen->f, usedRegs);
- assert(numUsed < gen->frame_size / 16 - 2);
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 2);
- /* save registers to stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- int offset = 2 + i;
- spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
- /* setup function arguments */
- for (a = 0; a < num_args; a++) {
- spe_move(gen->f, 3 + a, s_regs[a]);
- }
+ /* setup function arguments */
+ for (a = 0; a < num_args; a++) {
+ spe_move(gen->f, 3 + a, s_regs[a]);
+ }
- /* branch to function, save return addr */
- spe_brasl(gen->f, SPE_REG_RA, addr);
-
- /* save function's return value */
- if (scalar)
- spe_move(gen->f, retval_reg, 3);
- else
- spe_move(gen->f, d_reg, 3);
-
- /* restore registers from stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- if (reg != d_reg && reg != retval_reg) {
- int offset = 2 + i;
- spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
- }
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
- func_called = TRUE;
- }
+ /* save function's return value */
+ if (scalar)
+ spe_move(gen->f, retval_reg, 3);
+ else
+ spe_move(gen->f, d_reg, 3);
- if (scalar) {
- spe_move(gen->f, d_reg, retval_reg);
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_reg && reg != retval_reg) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
}
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
+ func_called = TRUE;
+ }
+
+ if (scalar) {
+ spe_move(gen->f, d_reg, retval_reg);
}
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
}
if (scalar) {
spe_release_register(gen->f, retval_reg);
}
- return true;
+ return TRUE;
}
@@ -1525,11 +1502,9 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
}
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
- free_itemps(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
}
return TRUE;
@@ -1549,35 +1524,31 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
/* zero = {0,0,0,0} */
zero_reg = get_itemp(gen);
- spe_load_uint(gen->f, zero_reg, 0);
+ spe_zero(gen->f, zero_reg);
cmp_reg = get_itemp(gen);
/* get src regs */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
}
/* test if any src regs are < 0 */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- if (kil_reg >= 0) {
- /* cmp = 0 > src ? : ~0 : 0 */
- spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
- /* kil = kil | cmp */
- spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
- }
- else {
- kil_reg = get_itemp(gen);
- /* kil = 0 > src ? : ~0 : 0 */
- spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (kil_reg >= 0) {
+ /* cmp = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
+ /* kil = kil | cmp */
+ spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
+ }
+ else {
+ kil_reg = get_itemp(gen);
+ /* kil = 0 > src ? : ~0 : 0 */
+ spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
}
}
- if (gen->if_nesting) {
+ if (gen->if_nesting || gen->loop_nesting) {
/* may have been a conditional kil */
spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
}
@@ -1599,96 +1570,92 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
/**
- * Emit max. See emit_SGT for comments.
+ * Emit min or max.
*/
static boolean
-emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
spe_comment(gen->f, -4, "MAX:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
+ d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ tmp_reg[ch] = get_itemp(gen);
}
/* d = (s0 > s1) ? s0 : s1 */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
- }
+ else
+ spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- }
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
}
free_itemps(gen);
- return true;
+ return TRUE;
}
+
/**
- * Emit max. See emit_SGT for comments.
+ * Emit code to update the execution mask.
+ * This needs to be done whenever the execution status of a conditional
+ * or loop is changed.
*/
-static boolean
-emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
+static void
+emit_update_exec_mask(struct codegen *gen)
{
- int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
+ const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = gen->cond_mask_reg;
+ const int loop_reg = gen->loop_mask_reg;
- spe_comment(gen->f, -4, "MIN:");
+ spe_comment(gen->f, 0, "Update master execution mask");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
+ if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
+ /* exec_mask = cond_mask & loop_mask */
+ assert(cond_reg > 0);
+ assert(loop_reg > 0);
+ spe_and(gen->f, exec_reg, cond_reg, loop_reg);
}
-
- /* d = (s1 > s0) ? s0 : s1 */
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
- }
+ else if (gen->if_nesting > 0) {
+ assert(cond_reg > 0);
+ spe_move(gen->f, exec_reg, cond_reg);
}
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
- }
+ else if (gen->loop_nesting > 0) {
+ assert(loop_reg > 0);
+ spe_move(gen->f, exec_reg, loop_reg);
}
-
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
- }
+ else {
+ spe_load_int(gen->f, exec_reg, ~0x0);
}
-
- free_itemps(gen);
- return true;
}
+
static boolean
emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
const int channel = 0;
- const int exec_reg = get_exec_mask_reg(gen);
+ int cond_reg;
spe_comment(gen->f, -4, "IF:");
- /* update execution mask with the predicate register */
+ cond_reg = get_cond_mask_reg(gen);
+
+ /* XXX push cond exec mask */
+
+ spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
+ spe_load_int(gen->f, cond_reg, ~0);
+
+ /* update conditional execution mask with the predicate register */
int tmp_reg = get_itemp(gen);
int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
@@ -1696,44 +1663,126 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
/* tmp = !tmp */
spe_complement(gen->f, tmp_reg, tmp_reg);
- /* exec_mask = exec_mask & tmp */
- spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
+ /* cond_mask = cond_mask & tmp */
+ spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
gen->if_nesting++;
+ /* update the master execution mask */
+ emit_update_exec_mask(gen);
+
free_itemps(gen);
- return true;
+ return TRUE;
}
static boolean
emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
- const int exec_reg = get_exec_mask_reg(gen);
+ const int cond_reg = get_cond_mask_reg(gen);
spe_comment(gen->f, -4, "ELSE:");
- /* exec_mask = !exec_mask */
- spe_complement(gen->f, exec_reg, exec_reg);
+ spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
+ spe_complement(gen->f, cond_reg, cond_reg);
+ emit_update_exec_mask(gen);
- return true;
+ return TRUE;
}
static boolean
emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
+ spe_comment(gen->f, -4, "ENDIF:");
+
+ /* XXX todo: pop cond exec mask */
+
+ gen->if_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int exec_reg, loop_reg;
+
+ spe_comment(gen->f, -4, "BGNLOOP:");
+
+ exec_reg = get_exec_mask_reg(gen);
+ loop_reg = get_loop_mask_reg(gen);
+
+ /* XXX push loop_exec mask */
+
+ spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
+ spe_load_int(gen->f, loop_reg, ~0x0);
+
+ gen->loop_nesting++;
+ gen->loop_start = spe_code_size(gen->f); /* in bytes */
+
+ return TRUE;
+}
+
+
+static boolean
+emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const int loop_reg = get_loop_mask_reg(gen);
+ const int tmp_reg = get_itemp(gen);
+ int offset;
+
+ spe_comment(gen->f, -4, "ENDLOOP:");
+
+ /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
+ spe_orx(gen->f, tmp_reg, loop_reg);
+
+ offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
+
+ /* branch back to top of loop if tmp_reg != 0 */
+ spe_brnz(gen->f, tmp_reg, offset / 4);
+
+ /* XXX pop loop_exec mask */
+
+ gen->loop_nesting--;
+
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
const int exec_reg = get_exec_mask_reg(gen);
+ const int loop_reg = get_loop_mask_reg(gen);
- spe_comment(gen->f, -4, "ENDIF:");
+ spe_comment(gen->f, -4, "BREAK:");
- /* XXX todo: pop execution mask */
+ assert(gen->loop_nesting > 0);
- spe_load_int(gen->f, exec_reg, ~0x0);
+ spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
+ spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
- gen->if_nesting--;
- return true;
+ emit_update_exec_mask(gen);
+
+ return TRUE;
+}
+
+
+static boolean
+emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ spe_comment(gen->f, -4, "CONT:");
+
+ assert(gen->loop_nesting > 0);
+
+ return TRUE;
}
@@ -1745,28 +1794,26 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
- for (ch = 0; ch < 4; ch++) {
- if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ FOR_EACH_ENABLED_CHANNEL(inst, ch) {
+ int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
- int t1_reg = get_itemp(gen);
- int t2_reg = get_itemp(gen);
+ int t1_reg = get_itemp(gen);
+ int t2_reg = get_itemp(gen);
- spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
- if (ddx) {
- spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
- }
- else {
- spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
- }
- spe_fs(gen->f, d_reg, t2_reg, t1_reg);
-
- free_itemps(gen);
+ spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
+ if (ddx) {
+ spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
+ }
+ else {
+ spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
}
+ spe_fs(gen->f, d_reg, t2_reg, t1_reg);
+
+ free_itemps(gen);
}
- return true;
+ return TRUE;
}
@@ -1784,7 +1831,7 @@ emit_END(struct codegen *gen)
{
spe_comment(gen->f, -4, "END:");
emit_epilogue(gen);
- return true;
+ return TRUE;
}
@@ -1796,15 +1843,15 @@ emit_instruction(struct codegen *gen,
const struct tgsi_full_instruction *inst)
{
switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ return emit_ARL(gen, inst);
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
return emit_MOV(gen, inst);
- case TGSI_OPCODE_MUL:
- return emit_MUL(gen, inst);
case TGSI_OPCODE_ADD:
- return emit_ADD(gen, inst);
case TGSI_OPCODE_SUB:
- return emit_SUB(gen, inst);
+ case TGSI_OPCODE_MUL:
+ return emit_binop(gen, inst);
case TGSI_OPCODE_MAD:
return emit_MAD(gen, inst);
case TGSI_OPCODE_LERP:
@@ -1820,29 +1867,22 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_XPD:
return emit_XPD(gen, inst);
case TGSI_OPCODE_RCP:
- return emit_RCP(gen, inst);
case TGSI_OPCODE_RSQ:
- return emit_RSQ(gen, inst);
+ return emit_RCP_RSQ(gen, inst);
case TGSI_OPCODE_ABS:
return emit_ABS(gen, inst);
case TGSI_OPCODE_SGT:
- return emit_SGT(gen, inst);
case TGSI_OPCODE_SLT:
- return emit_SLT(gen, inst);
case TGSI_OPCODE_SGE:
- return emit_SGE(gen, inst);
case TGSI_OPCODE_SLE:
- return emit_SLE(gen, inst);
case TGSI_OPCODE_SEQ:
- return emit_SEQ(gen, inst);
case TGSI_OPCODE_SNE:
- return emit_SNE(gen, inst);
+ return emit_inequality(gen, inst);
case TGSI_OPCODE_CMP:
return emit_CMP(gen, inst);
- case TGSI_OPCODE_MAX:
- return emit_MAX(gen, inst);
case TGSI_OPCODE_MIN:
- return emit_MIN(gen, inst);
+ case TGSI_OPCODE_MAX:
+ return emit_MIN_MAX(gen, inst);
case TGSI_OPCODE_TRUNC:
return emit_TRUNC(gen, inst);
case TGSI_OPCODE_FLR:
@@ -1882,20 +1922,29 @@ emit_instruction(struct codegen *gen,
case TGSI_OPCODE_ENDIF:
return emit_ENDIF(gen, inst);
+ case TGSI_OPCODE_BGNLOOP2:
+ return emit_BGNLOOP(gen, inst);
+ case TGSI_OPCODE_ENDLOOP2:
+ return emit_ENDLOOP(gen, inst);
+ case TGSI_OPCODE_BRK:
+ return emit_BRK(gen, inst);
+ case TGSI_OPCODE_CONT:
+ return emit_CONT(gen, inst);
+
case TGSI_OPCODE_DDX:
- return emit_DDX_DDY(gen, inst, true);
+ return emit_DDX_DDY(gen, inst, TRUE);
case TGSI_OPCODE_DDY:
- return emit_DDX_DDY(gen, inst, false);
+ return emit_DDX_DDY(gen, inst, FALSE);
/* XXX lots more cases to do... */
default:
fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
inst->Instruction.Opcode);
- return false;
+ return FALSE;
}
- return true;
+ return TRUE;
}
@@ -1923,10 +1972,14 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
}
else {
+ char str[100];
int reg = spe_allocate_available_register(gen->f);
if (reg < 0)
- return false;
+ return FALSE;
+
+ sprintf(str, "init $%d = %f", reg, val);
+ spe_comment(gen->f, 0, str);
/* update immediate map */
gen->imm_regs[gen->num_imm][ch] = reg;
@@ -1938,7 +1991,7 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
gen->num_imm++;
- return true;
+ return TRUE;
}
@@ -1963,7 +2016,7 @@ emit_declaration(struct cell_context *cell,
for (ch = 0; ch < 4; ch++) {
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
if (gen->temp_regs[i][ch] < 0)
- return false; /* out of regs */
+ return FALSE; /* out of regs */
}
/* XXX if we run out of SPE registers, we need to spill
@@ -1983,7 +2036,7 @@ emit_declaration(struct cell_context *cell,
; /* ignore */
}
- return true;
+ return TRUE;
}
@@ -2019,7 +2072,7 @@ cell_gen_fragment_program(struct cell_context *cell,
spe_allocate_register(f, gen.constants_reg);
if (cell->debug_flags & CELL_DEBUG_ASM) {
- spe_print_code(f, true);
+ spe_print_code(f, TRUE);
spe_indent(f, 8);
printf("Begin %s\n", __FUNCTION__);
tgsi_dump(tokens, 0);
@@ -2035,17 +2088,17 @@ cell_gen_fragment_program(struct cell_context *cell,
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
- gen.error = true;
+ gen.error = TRUE;
break;
case TGSI_TOKEN_TYPE_DECLARATION:
if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
- gen.error = true;
+ gen.error = TRUE;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
- gen.error = true;
+ gen.error = TRUE;
break;
default:
diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c
index d223557950..6fc2257e2a 100644
--- a/src/gallium/drivers/cell/ppu/cell_screen.c
+++ b/src/gallium/drivers/cell/ppu/cell_screen.c
@@ -81,8 +81,12 @@ cell_get_param(struct pipe_screen *screen, int param)
return 8; /* max 128x128x128 */
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return CELL_MAX_TEXTURE_LEVELS;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1; /* XXX not really true */
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 0; /* XXX to do */
default:
- return 10;
+ return 0;
}
}
@@ -108,7 +112,7 @@ cell_get_paramf(struct pipe_screen *screen, int param)
return 16.0; /* arbitrary */
default:
- return 10;
+ return 0;
}
}
diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h
new file mode 100644
index 0000000000..7cbdb814d2
--- /dev/null
+++ b/src/gallium/drivers/cell/spu/spu_shuffle.h
@@ -0,0 +1,186 @@
+#ifndef SPU_SHUFFLE_H
+#define SPU_SHUFFLE_H
+
+/*
+ * Generate shuffle patterns with minimal fuss.
+ *
+ * Based on ideas from
+ * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
+ *
+ * A-P indicates 0-15th position in first vector
+ * a-p indicates 0-15th position in second vector
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | A| B| C| D| E| F| G| H|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ *
+ * x or X indicates 0xff
+ * 8 indicates 0x80
+ * 0 indicates 0x00
+ *
+ * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector
+ * unsigned char literal suitable for use with spu_shuffle().
+ *
+ * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector
+ * literal suitable for use with si_shufb().
+ *
+ *
+ * For example :
+ * SHUFB4(A,A,A,A)
+ * expands to :
+ * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
+ *
+ * SHUFFLE8(A,B,a,b,C,c,8,8)
+ * expands to :
+ * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
+ * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
+ *
+ */
+
+#include <spu_intrinsics.h>
+
+#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03
+#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07
+#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b
+#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f
+#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13
+#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17
+#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b
+#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f
+#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0
+#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80
+#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0
+
+#define SHUFFLE_VECTOR_4__(A, B, C, D) \
+ SHUFFLE_PATTERN_4_##A##__, \
+ SHUFFLE_PATTERN_4_##B##__, \
+ SHUFFLE_PATTERN_4_##C##__, \
+ SHUFFLE_PATTERN_4_##D##__
+
+#define SHUFFLE4(A, B, C, D) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+#define SHUFB4(A, B, C, D) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_4__(A, B, C, D) \
+ })
+
+
+#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01
+#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03
+#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05
+#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07
+#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09
+#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b
+#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d
+#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f
+#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11
+#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13
+#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15
+#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17
+#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19
+#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b
+#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d
+#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f
+#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0
+#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80
+#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0
+
+
+#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ SHUFFLE_PATTERN_8_##A##__, \
+ SHUFFLE_PATTERN_8_##B##__, \
+ SHUFFLE_PATTERN_8_##C##__, \
+ SHUFFLE_PATTERN_8_##D##__, \
+ SHUFFLE_PATTERN_8_##E##__, \
+ SHUFFLE_PATTERN_8_##F##__, \
+ SHUFFLE_PATTERN_8_##G##__, \
+ SHUFFLE_PATTERN_8_##H##__
+
+#define SHUFFLE8(A, B, C, D, E, F, G, H) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+#define SHUFB8(A, B, C, D, E, F, G, H) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
+ })
+
+
+#define SHUFFLE_PATTERN_16_A__ 0x00
+#define SHUFFLE_PATTERN_16_B__ 0x01
+#define SHUFFLE_PATTERN_16_C__ 0x02
+#define SHUFFLE_PATTERN_16_D__ 0x03
+#define SHUFFLE_PATTERN_16_E__ 0x04
+#define SHUFFLE_PATTERN_16_F__ 0x05
+#define SHUFFLE_PATTERN_16_G__ 0x06
+#define SHUFFLE_PATTERN_16_H__ 0x07
+#define SHUFFLE_PATTERN_16_I__ 0x08
+#define SHUFFLE_PATTERN_16_J__ 0x09
+#define SHUFFLE_PATTERN_16_K__ 0x0a
+#define SHUFFLE_PATTERN_16_L__ 0x0b
+#define SHUFFLE_PATTERN_16_M__ 0x0c
+#define SHUFFLE_PATTERN_16_N__ 0x0d
+#define SHUFFLE_PATTERN_16_O__ 0x0e
+#define SHUFFLE_PATTERN_16_P__ 0x0f
+#define SHUFFLE_PATTERN_16_a__ 0x10
+#define SHUFFLE_PATTERN_16_b__ 0x11
+#define SHUFFLE_PATTERN_16_c__ 0x12
+#define SHUFFLE_PATTERN_16_d__ 0x13
+#define SHUFFLE_PATTERN_16_e__ 0x14
+#define SHUFFLE_PATTERN_16_f__ 0x15
+#define SHUFFLE_PATTERN_16_g__ 0x16
+#define SHUFFLE_PATTERN_16_h__ 0x17
+#define SHUFFLE_PATTERN_16_i__ 0x18
+#define SHUFFLE_PATTERN_16_j__ 0x19
+#define SHUFFLE_PATTERN_16_k__ 0x1a
+#define SHUFFLE_PATTERN_16_l__ 0x1b
+#define SHUFFLE_PATTERN_16_m__ 0x1c
+#define SHUFFLE_PATTERN_16_n__ 0x1d
+#define SHUFFLE_PATTERN_16_o__ 0x1e
+#define SHUFFLE_PATTERN_16_p__ 0x1f
+#define SHUFFLE_PATTERN_16_X__ 0xc0
+#define SHUFFLE_PATTERN_16_x__ 0xc0
+#define SHUFFLE_PATTERN_16_0__ 0x80
+#define SHUFFLE_PATTERN_16_8__ 0xe0
+
+#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ SHUFFLE_PATTERN_16_##A##__, \
+ SHUFFLE_PATTERN_16_##B##__, \
+ SHUFFLE_PATTERN_16_##C##__, \
+ SHUFFLE_PATTERN_16_##D##__, \
+ SHUFFLE_PATTERN_16_##E##__, \
+ SHUFFLE_PATTERN_16_##F##__, \
+ SHUFFLE_PATTERN_16_##G##__, \
+ SHUFFLE_PATTERN_16_##H##__, \
+ SHUFFLE_PATTERN_16_##I##__, \
+ SHUFFLE_PATTERN_16_##J##__, \
+ SHUFFLE_PATTERN_16_##K##__, \
+ SHUFFLE_PATTERN_16_##L##__, \
+ SHUFFLE_PATTERN_16_##M##__, \
+ SHUFFLE_PATTERN_16_##N##__, \
+ SHUFFLE_PATTERN_16_##O##__, \
+ SHUFFLE_PATTERN_16_##P
+
+#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const vector unsigned char){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ ((const qword){ \
+ SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
+ })
+
+#endif
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 22e51a86ae..322be1252e 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -35,6 +35,7 @@
#include "util/u_math.h"
#include "spu_colorpack.h"
#include "spu_main.h"
+#include "spu_shuffle.h"
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
@@ -76,8 +77,13 @@ struct vertex_header {
* Triangle edge info
*/
struct edge {
- float dx; /**< X(v1) - X(v0), used only during setup */
- float dy; /**< Y(v1) - Y(v0), used only during setup */
+ union {
+ struct {
+ float dx; /**< X(v1) - X(v0), used only during setup */
+ float dy; /**< Y(v1) - Y(v0), used only during setup */
+ };
+ vec_float4 ds; /**< vector accessor for dx and dy */
+ };
float dxdy; /**< dx/dy */
float sx, sy; /**< first sample point coord */
int lines; /**< number of lines on this edge */
@@ -102,10 +108,15 @@ struct setup_stage {
* turn. Currently fixed at 4 floats, but should change in time.
* Codegen will help cope with this.
*/
- const struct vertex_header *vmax;
- const struct vertex_header *vmid;
- const struct vertex_header *vmin;
- const struct vertex_header *vprovoke;
+ union {
+ struct {
+ const struct vertex_header *vmin;
+ const struct vertex_header *vmid;
+ const struct vertex_header *vmax;
+ const struct vertex_header *vprovoke;
+ };
+ qword vertex_headers;
+ };
struct edge ebot;
struct edge etop;
@@ -122,8 +133,7 @@ struct setup_stage {
struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
struct {
- int left[2]; /**< [0] = row0, [1] = row1 */
- int right[2];
+ vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
int y;
unsigned y_flags;
unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
@@ -306,52 +316,35 @@ block(int x)
/**
- * Compute mask which indicates which pixels in the 2x2 quad are actually inside
- * the triangle's bounds.
- * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
- */
-static INLINE mask_t
-calculate_mask(int x)
-{
- /* This is a little tricky.
- * Use & instead of && to avoid branches.
- * Use negation to convert true/false to ~0/0 values.
- */
- mask_t mask;
- mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
- mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
- mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
- mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
- return mask;
-}
-
-
-/**
* Render a horizontal span of quads
*/
static void
flush_spans(void)
{
int minleft, maxright;
- int x;
+
+ const int l0 = spu_extract(setup.span.quad, 0);
+ const int l1 = spu_extract(setup.span.quad, 1);
+ const int r0 = spu_extract(setup.span.quad, 2);
+ const int r1 = spu_extract(setup.span.quad, 3);
switch (setup.span.y_flags) {
case 0x3:
/* both odd and even lines written (both quad rows) */
- minleft = MIN2(setup.span.left[0], setup.span.left[1]);
- maxright = MAX2(setup.span.right[0], setup.span.right[1]);
+ minleft = MIN2(l0, l1);
+ maxright = MAX2(r0, r1);
break;
case 0x1:
/* only even line written (quad top row) */
- minleft = setup.span.left[0];
- maxright = setup.span.right[0];
+ minleft = l0;
+ maxright = r0;
break;
case 0x2:
/* only odd line written (quad bottom row) */
- minleft = setup.span.left[1];
- maxright = setup.span.right[1];
+ minleft = l1;
+ maxright = r1;
break;
default:
@@ -389,17 +382,42 @@ flush_spans(void)
ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
}
- /* XXX this loop could be moved into the above switch cases and
- * calculate_mask() could be simplified a bit...
- */
- for (x = block(minleft); x <= block(maxright); x += 2) {
- emit_quad( x, setup.span.y, calculate_mask( x ));
+ /* XXX this loop could be moved into the above switch cases... */
+
+ /* Setup for mask calculation */
+ const vec_int4 quad_LlRr = setup.span.quad;
+ const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
+ const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
+ const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
+
+ const vec_int4 twos = spu_splats(2);
+
+ const int x = block(minleft);
+ vec_int4 xs = {x, x+1, x, x+1};
+
+ for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
+ /**
+ * Computes mask to indicate which pixels in the 2x2 quad are actually
+ * inside the triangle's bounds.
+ */
+
+ /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
+ const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
+ const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
+
+ /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
+ const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
+
+ /* Combine results to create mask */
+ const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
+
+ emit_quad(spu_extract(xs, 0), setup.span.y, mask);
}
setup.span.y = 0;
setup.span.y_flags = 0;
- setup.span.right[0] = 0;
- setup.span.right[1] = 0;
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
}
@@ -444,55 +462,39 @@ setup_sort_vertices(const struct vertex_header *v0,
/* determine bottom to top order of vertices */
{
- float y0 = spu_extract(v0->data[0], 1);
- float y1 = spu_extract(v1->data[0], 1);
- float y2 = spu_extract(v2->data[0], 1);
- if (y0 <= y1) {
- if (y1 <= y2) {
- /* y0<=y1<=y2 */
- setup.vmin = v0;
- setup.vmid = v1;
- setup.vmax = v2;
- sign = -1.0f;
- }
- else if (y2 <= y0) {
- /* y2<=y0<=y1 */
- setup.vmin = v2;
- setup.vmid = v0;
- setup.vmax = v1;
- sign = -1.0f;
- }
- else {
- /* y0<=y2<=y1 */
- setup.vmin = v0;
- setup.vmid = v2;
- setup.vmax = v1;
- sign = 1.0f;
- }
- }
- else {
- if (y0 <= y2) {
- /* y1<=y0<=y2 */
- setup.vmin = v1;
- setup.vmid = v0;
- setup.vmax = v2;
- sign = 1.0f;
- }
- else if (y2 <= y1) {
- /* y2<=y1<=y0 */
- setup.vmin = v2;
- setup.vmid = v1;
- setup.vmax = v0;
- sign = 1.0f;
- }
- else {
- /* y1<=y2<=y0 */
- setup.vmin = v1;
- setup.vmid = v2;
- setup.vmax = v0;
- sign = -1.0f;
- }
- }
+ /* A table of shuffle patterns for putting vertex_header pointers into
+ correct order. Quite magical. */
+ const vec_uchar16 sort_order_patterns[] = {
+ SHUFFLE4(A,B,C,C),
+ SHUFFLE4(C,A,B,C),
+ SHUFFLE4(A,C,B,C),
+ SHUFFLE4(B,C,A,C),
+ SHUFFLE4(B,A,C,C),
+ SHUFFLE4(C,B,A,C) };
+
+ /* The vertex_header pointers, packed for easy shuffling later */
+ const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
+
+ /* Collate y values into two vectors for comparison.
+ Using only one shuffle constant! ;) */
+ const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C));
+ const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C));
+
+ /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
+ const vec_uint4 compare = spu_cmpgt(y_012, y_120);
+ /* Compress the result of the comparison into 4 bits */
+ const vec_uint4 gather = spu_gather(compare);
+ /* Subtract one to attain the index into the LUT. Magical. */
+ const unsigned int index = spu_extract(gather, 0) - 1;
+
+ /* Load the appropriate pattern and construct the desired vector. */
+ setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]);
+
+ /* Using the result of the comparison, set sign.
+ Very magical. */
+ sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
}
/* Check if triangle is completely outside the tile bounds */
@@ -509,12 +511,9 @@ setup_sort_vertices(const struct vertex_header *v0,
spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
return FALSE;
- setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
- setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
- setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
- setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
- setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
- setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
+ setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
+ setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
+ setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
/*
* Compute triangle's area. Use 1/area to compute partial
@@ -535,8 +534,6 @@ setup_sort_vertices(const struct vertex_header *v0,
setup.facing = (area * sign > 0.0f)
^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
- setup.vprovoke = v2;
-
return TRUE;
}
@@ -746,9 +743,11 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
setup.span.y = block(_y);
}
- setup.span.left[_y&1] = left;
- setup.span.right[_y&1] = right;
- setup.span.y_flags |= 1<<(_y&1);
+ int offset = _y&1;
+ vec_int4 quad_LlRr = {left, left, right, right};
+ /* Store left and right in 0 or 1 row of quad based on offset */
+ setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
+ setup.span.y_flags |= 1<<offset;
}
}
@@ -790,8 +789,8 @@ tri_draw(const float *v0, const float *v1, const float *v2,
setup.span.y = 0;
setup.span.y_flags = 0;
- setup.span.right[0] = 0;
- setup.span.right[1] = 0;
+ /* Zero right elements */
+ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
if (setup.oneOverArea < 0.0) {
/* emaj on left */
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index 4fda1ab64f..a8e97e7c30 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -197,9 +197,7 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render,
i915_render->fallback = 0;
return TRUE;
default:
- assert((int)"Error unkown primtive type" & 0);
- /* Actually, can handle a lot more just fine... Fixme.
- */
+ /* FIXME: Actually, can handle a lot more just fine... */
return FALSE;
}
}
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
index d40d75f264..2f974cf5c4 100644
--- a/src/gallium/drivers/nv30/nv30_query.c
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -50,7 +50,7 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* the existing query to notify completion, but it could be better.
*/
if (q->object) {
- uint64 tmp;
+ uint64_t tmp;
pipe->get_query_result(pipe, pq, 1, &tmp);
}
@@ -80,7 +80,7 @@ nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
static boolean
nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
- boolean wait, uint64 *result)
+ boolean wait, uint64_t *result)
{
struct nv30_context *nv30 = nv30_context(pipe);
struct nv30_query *q = nv30_query(pq);
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
index 40fed621b2..9480695d6e 100644
--- a/src/gallium/drivers/nv30/nv30_state_emit.c
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -49,7 +49,7 @@ nv30_state_emit(struct nv30_context *nv30)
struct nv30_state *state = &nv30->state;
struct nv30_screen *screen = nv30->screen;
unsigned i, samplers;
- uint64 states;
+ uint64_t states;
if (nv30->pctx_id != screen->cur_pctx) {
for (i = 0; i < NV30_STATE_MAX; i++) {
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
index 57f39cfab0..9b9a43f49d 100644
--- a/src/gallium/drivers/nv40/nv40_query.c
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -50,7 +50,7 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* the existing query to notify completion, but it could be better.
*/
if (q->object) {
- uint64 tmp;
+ uint64_t tmp;
pipe->get_query_result(pipe, pq, 1, &tmp);
}
@@ -80,7 +80,7 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
static boolean
nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq,
- boolean wait, uint64 *result)
+ boolean wait, uint64_t *result)
{
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_query *q = nv40_query(pq);
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index ab88dc416e..52ec4c044b 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -65,7 +65,7 @@ nv40_state_emit(struct nv40_context *nv40)
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
unsigned i, samplers;
- uint64 states;
+ uint64_t states;
if (nv40->pctx_id != screen->cur_pctx) {
for (i = 0; i < NV40_STATE_MAX; i++) {
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 5d377f2d06..0958bba334 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -70,6 +70,10 @@ struct nv50_rasterizer_stateobj {
struct nv50_miptree {
struct pipe_texture base;
struct pipe_buffer *buffer;
+
+ int *image_offset;
+ int image_nr;
+ int total_size;
};
static INLINE struct nv50_miptree *
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 28a8bdc0fa..2497371232 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -31,7 +31,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
{
struct pipe_winsys *ws = pscreen->winsys;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
- unsigned usage, pitch;
+ unsigned usage, width = pt->width[0], height = pt->height[0];
+ int i;
mt->base = *pt;
mt->base.refcount = 1;
@@ -47,11 +48,31 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
break;
}
- pitch = ((pt->width[0] + 63) & ~63) * pt->block.size;
- /*XXX*/
- pitch *= 2;
+ switch (pt->target) {
+ case PIPE_TEXTURE_3D:
+ mt->image_nr = pt->depth[0];
+ break;
+ case PIPE_TEXTURE_CUBE:
+ mt->image_nr = 6;
+ break;
+ default:
+ mt->image_nr = 1;
+ break;
+ }
+ mt->image_offset = CALLOC(mt->image_nr, sizeof(int));
- mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]);
+ for (i = 0; i < mt->image_nr; i++) {
+ int image_size;
+
+ image_size = align(width, 8) * pt->block.size;
+ image_size = align(image_size, 64);
+ image_size *= align(height, 8) * pt->block.size;
+
+ mt->image_offset[i] = mt->total_size;
+ mt->total_size += image_size;
+ }
+
+ mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size);
if (!mt->buffer) {
FREE(mt);
return NULL;
@@ -83,6 +104,15 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_surface *s;
struct pipe_surface *ps;
+ int img;
+
+ if (pt->target == PIPE_TEXTURE_CUBE)
+ img = face;
+ else
+ if (pt->target == PIPE_TEXTURE_3D)
+ img = zslice;
+ else
+ img = 0;
s = CALLOC_STRUCT(nv50_surface);
if (!s)
@@ -98,7 +128,7 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
ps->nblocksx = pt->nblocksx[level];
ps->nblocksy = pt->nblocksy[level];
ps->stride = ps->width * ps->block.size;
- ps->offset = 0;
+ ps->offset = mt->image_offset[img];
ps->usage = flags;
ps->status = PIPE_SURFACE_STATUS_DEFINED;
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d6fbdd1824..d66e1d0949 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -179,6 +179,38 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r)
}
}
+static int
+alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx)
+{
+ int i;
+
+ if ((idx + 4) >= NV50_SU_MAX_TEMP)
+ return 1;
+
+ if (pc->r_temp[idx] || pc->r_temp[idx + 1] ||
+ pc->r_temp[idx + 2] || pc->r_temp[idx + 3])
+ return alloc_temp4(pc, dst, idx + 1);
+
+ for (i = 0; i < 4; i++) {
+ dst[i] = CALLOC_STRUCT(nv50_reg);
+ dst[i]->type = P_TEMP;
+ dst[i]->index = -1;
+ dst[i]->hw = idx + i;
+ pc->r_temp[idx + i] = dst[i];
+ }
+
+ return 0;
+}
+
+static void
+free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4])
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ free_temp(pc, reg[i]);
+}
+
static struct nv50_reg *
temp_temp(struct nv50_pc *pc)
{
@@ -902,7 +934,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
{
const struct tgsi_full_instruction *inst = &tok->FullInstruction;
struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;
- unsigned mask, sat;
+ unsigned mask, sat, unit;
int i, c;
mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
@@ -916,8 +948,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+
+ if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
+ unit = fs->SrcRegister.Index;
+
for (c = 0; c < 4; c++)
- src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]);
+ src[i][c] = tgsi_src(pc, c, fs);
}
if (sat) {
@@ -1155,35 +1192,30 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_TEX:
- {
- struct nv50_reg *t0, *t1, *t2, *t3;
- struct nv50_program_exec *e;
+ case TGSI_OPCODE_TXP:
+ {
+ struct nv50_reg *t[4];
+ struct nv50_program_exec *e;
- t0 = alloc_temp(pc, NULL);
- t0 = alloc_temp(pc, NULL);
- t1 = alloc_temp(pc, NULL);
- t2 = alloc_temp(pc, NULL);
- t3 = alloc_temp(pc, NULL);
- emit_mov(pc, t0, src[0][0]);
- emit_mov(pc, t1, src[0][1]);
+ alloc_temp4(pc, t, 0);
+ emit_mov(pc, t[0], src[0][0]);
+ emit_mov(pc, t[1], src[0][1]);
- e = exec(pc);
- e->inst[0] = 0xf6400000;
- set_long(pc, e);
- e->inst[1] |= 0x0000c004;
- set_dst(pc, t0, e);
- emit(pc, e);
+ e = exec(pc);
+ e->inst[0] = 0xf6400000;
+ e->inst[0] |= (unit << 9);
+ set_long(pc, e);
+ e->inst[1] |= 0x0000c004;
+ set_dst(pc, t[0], e);
+ emit(pc, e);
- if (mask & (1 << 0)) emit_mov(pc, dst[0], t0);
- if (mask & (1 << 1)) emit_mov(pc, dst[1], t1);
- if (mask & (1 << 2)) emit_mov(pc, dst[2], t2);
- if (mask & (1 << 3)) emit_mov(pc, dst[3], t3);
+ if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
+ if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
+ if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
+ if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
- free_temp(pc, t0);
- free_temp(pc, t1);
- free_temp(pc, t2);
- free_temp(pc, t3);
- }
+ free_temp4(pc, t);
+ }
break;
case TGSI_OPCODE_XPD:
temp = alloc_temp(pc, NULL);
@@ -1570,8 +1602,13 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
if (!upload)
return;
+ NOUVEAU_ERR("-------\n");
up = ptr = MALLOC(p->exec_size * 4);
for (e = p->exec_head; e; e = e->next) {
+ NOUVEAU_ERR("0x%08x\n", e->inst[0]);
+ if (is_long(e))
+ NOUVEAU_ERR("0x%08x\n", e->inst[1]);
+
*(ptr++) = e->inst[0];
if (is_long(e))
*(ptr++) = e->inst[1];
@@ -1687,7 +1724,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
- struct pipe_winsys *ws = nv50->pipe.winsys;
+ struct pipe_screen *pscreen = nv50->pipe.screen;
while (p->exec_head) {
struct nv50_program_exec *e = p->exec_head;
@@ -1699,7 +1736,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
p->exec_size = 0;
if (p->buffer)
- pipe_buffer_reference(ws, &p->buffer, NULL);
+ pipe_buffer_reference(pscreen, &p->buffer, NULL);
nv50->screen->nvws->res_free(&p->data);
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 26bd90ccc5..777e77906d 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -51,7 +51,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *q)
static boolean
nv50_query_result(struct pipe_context *pipe, struct pipe_query *q,
- boolean wait, uint64 *result)
+ boolean wait, uint64_t *result)
{
NOUVEAU_ERR("unimplemented\n");
*result = 0xdeadcafe;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 7ab12a6d70..e2451c6ecb 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -94,7 +94,7 @@ struct softpipe_context {
/* Counter for occlusion queries. Note this supports overlapping
* queries.
*/
- uint64 occlusion_count;
+ uint64_t occlusion_count;
/*
* Mapped vertex buffers
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 40329a9562..5dacbbe55f 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -171,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs)
struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
{
struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
- uint i;
/* allocate storage for program inputs/outputs, aligned to 16 bytes */
qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16);
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 2106ee1d23..b0d8e01426 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -37,8 +37,8 @@
#include "sp_query.h"
struct softpipe_query {
- uint64 start;
- uint64 end;
+ uint64_t start;
+ uint64_t end;
};
@@ -87,7 +87,7 @@ static boolean
softpipe_get_query_result(struct pipe_context *pipe,
struct pipe_query *q,
boolean wait,
- uint64 *result )
+ uint64_t *result )
{
struct softpipe_query *sq = softpipe_query(q);
*result = sq->end - sq->start;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 12f98c32f5..11b08b3a82 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -55,9 +55,9 @@ softpipe_get_param(struct pipe_screen *screen, int param)
{
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
- return 8;
+ return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return 8;
+ return PIPE_MAX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 0cb4b2f03c..a64dc89f43 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -94,31 +94,50 @@ softpipe_texture_layout(struct pipe_screen *screen,
return spt->buffer != NULL;
}
+/* Hack it up to use the old winsys->surface_alloc_storage()
+ * method for now:
+ */
static boolean
softpipe_displaytarget_layout(struct pipe_screen *screen,
struct softpipe_texture * spt)
{
struct pipe_winsys *ws = screen->winsys;
- size_t tex_size;
- unsigned cpp;
-
- switch (spt->base.format) {
- case PIPE_FORMAT_R5G6B5_UNORM:
- cpp = 2;
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- default:
- cpp = 4;
- break;
+ struct pipe_surface surf;
+ unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_GPU_READ |
+ PIPE_BUFFER_USAGE_GPU_WRITE);
+ int ret;
+
+
+ memset(&surf, 0, sizeof(surf));
+
+ ret =ws->surface_alloc_storage( ws,
+ &surf,
+ spt->base.width[0],
+ spt->base.height[0],
+ spt->base.format,
+ flags,
+ spt->base.tex_usage);
+ if(ret != 0)
+ return FALSE;
+
+ if (!surf.buffer) {
+ /* allocation failed */
+ return FALSE;
}
- tex_size = spt->base.width[0] * cpp * spt->base.height[0];
- spt->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, tex_size);
+
/* Now extract the goodies:
*/
spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);
spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);
- spt->stride[0] = spt->base.width[0] * cpp;
+ spt->stride[0] = surf.stride;
+
+ /* Transfer the reference:
+ */
+ spt->buffer = surf.buffer;
+ surf.buffer = NULL;
+
return spt->buffer != NULL;
}
@@ -220,10 +239,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
ps = CALLOC_STRUCT(pipe_surface);
ps->refcount = 1;
- ps->winsys = ws;
if (ps) {
assert(ps->refcount);
- assert(ps->winsys);
pipe_texture_reference(&ps->texture, pt);
pipe_buffer_reference(screen, &ps->buffer, spt->buffer);
ps->format = pt->format;
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 1dd7719379..f0d51ad82e 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -274,11 +274,11 @@ static INLINE boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *query,
boolean wait,
- uint64 *presult)
+ uint64_t *presult)
{
struct trace_context *tr_ctx = trace_context(_pipe);
struct pipe_context *pipe = tr_ctx->pipe;
- uint64 result;
+ uint64_t result;
boolean _result;
trace_dump_call_begin("pipe_context", "get_query_result");
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 7bcebd3d6b..bc2a0a7ef3 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -96,7 +96,6 @@ typedef int _Bool;
typedef unsigned int uint;
typedef unsigned char ubyte;
typedef unsigned short ushort;
-typedef uint64_t uint64;
#if 0
#define boolean bool
@@ -112,20 +111,22 @@ typedef unsigned char boolean;
/* Function inlining */
-#ifdef __cplusplus
-# define INLINE inline
-#elif defined(__GNUC__)
-# define INLINE __inline__
-#elif defined(_MSC_VER)
-# define INLINE __inline
-#elif defined(__ICL)
-# define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-# define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-# define INLINE __inline
-#else
-# define INLINE
+#ifndef INLINE
+# ifdef __cplusplus
+# define INLINE inline
+# elif defined(__GNUC__)
+# define INLINE __inline__
+# elif defined(_MSC_VER)
+# define INLINE __inline
+# elif defined(__ICL)
+# define INLINE __inline
+# elif defined(__INTEL_COMPILER)
+# define INLINE inline
+# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+# define INLINE __inline
+# else
+# define INLINE
+# endif
#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 2646706ff2..166c6b6b7e 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -109,7 +109,7 @@ struct pipe_context {
boolean (*get_query_result)(struct pipe_context *pipe,
struct pipe_query *q,
boolean wait,
- uint64 *result);
+ uint64_t *result);
/*@}*/
/**
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 1fdcec639f..7b8b64b592 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -245,7 +245,7 @@ struct st_context {
memcpy(map, vertices, size);
pipe_buffer_unmap(screen, vbuf);
- util_draw_vertex_buffer(pipe, vbuf, prim, num_verts, num_attribs);
+ util_draw_vertex_buffer(pipe, vbuf, 0, prim, num_verts, num_attribs);
error2:
pipe_buffer_reference(screen, &vbuf, NULL);
diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript
index fdcdde101a..e68d5db7f4 100644
--- a/src/gallium/winsys/gdi/SConscript
+++ b/src/gallium/winsys/gdi/SConscript
@@ -19,7 +19,7 @@ if env['platform'] == 'windows':
env.Append(LIBS = [
'gdi32',
- 'user32',ss
+ 'user32',
'kernel32',
])
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index e66ce48f2d..bd5aa10a20 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -320,7 +320,7 @@ gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys,
memset(&bmi, 0, sizeof(BITMAPINFO));
bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bmi.bmiHeader.biWidth = surface->stride / pf_get_size(surface->format);
- bmi.bmiHeader.biHeight= -surface->height;
+ bmi.bmiHeader.biHeight= -(long)surface->height;
bmi.bmiHeader.biPlanes = 1;
bmi.bmiHeader.biBitCount = pf_get_bits(surface->format);
bmi.bmiHeader.biCompression = BI_RGB;
@@ -337,8 +337,23 @@ gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys,
}
-const struct stw_winsys stw_winsys = {
+static const struct stw_winsys stw_winsys = {
&gdi_softpipe_screen_create,
&gdi_softpipe_context_create,
&gdi_softpipe_flush_frontbuffer
};
+
+
+BOOL WINAPI
+DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
+{
+ switch (fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ return st_init(&stw_winsys);
+
+ case DLL_PROCESS_DETACH:
+ st_cleanup();
+ break;
+ }
+ return TRUE;
+}