summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/i915simple
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/i915simple')
-rw-r--r--src/gallium/drivers/i915simple/i915_context.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_context.h43
-rw-r--r--src/gallium/drivers/i915simple/i915_flush.c3
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc.h23
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_emit.c163
-rw-r--r--src/gallium/drivers/i915simple/i915_fpc_translate.c183
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_emit.c40
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c9
-rw-r--r--src/gallium/drivers/i915simple/i915_state.c38
-rw-r--r--src/gallium/drivers/i915simple/i915_state_derived.c104
-rw-r--r--src/gallium/drivers/i915simple/i915_state_emit.c39
11 files changed, 417 insertions, 232 deletions
diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c
index acfa349439..c3955bbd2d 100644
--- a/src/gallium/drivers/i915simple/i915_context.c
+++ b/src/gallium/drivers/i915simple/i915_context.c
@@ -298,10 +298,12 @@ struct pipe_context *i915_create( struct pipe_winsys *pipe_winsys,
i915_init_string_functions(i915);
i915_init_texture_functions(i915);
+ draw_install_aaline_stage(i915->draw, &i915->pipe);
+ draw_install_aapoint_stage(i915->draw, &i915->pipe);
+
i915->pci_id = pci_id;
i915->flags.is_i945 = is_i945;
-
i915->dirty = ~0;
i915->hardware_dirty = ~0;
diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h
index 2d876925b2..d32dded6bd 100644
--- a/src/gallium/drivers/i915simple/i915_context.h
+++ b/src/gallium/drivers/i915simple/i915_context.h
@@ -79,6 +79,40 @@
#define I915_MAX_CONSTANT 32
+/** See constant_flags[] below */
+#define I915_CONSTFLAG_USER 0x1f
+
+
+/**
+ * Subclass of pipe_shader_state
+ */
+struct i915_fragment_shader
+{
+ struct pipe_shader_state state;
+ uint *program;
+ uint program_len;
+
+ /**
+ * constants introduced during translation.
+ * These are placed at the end of the constant buffer and grow toward
+ * the beginning (eg: slot 31, 30 29, ...)
+ * User-provided constants start at 0.
+ * This allows both types of constants to co-exist (until there's too many)
+ * and doesn't require regenerating/changing the fragment program to
+ * shuffle constants around.
+ */
+ uint num_constants;
+ float constants[I915_MAX_CONSTANT][4];
+
+ /**
+ * Status of each constant
+ * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding
+ * slot of the user's constant buffer. (set by pipe->set_constant_buffer())
+ * Else, the bitmask indicates which components are occupied by immediates.
+ */
+ ubyte constant_flags[I915_MAX_CONSTANT];
+};
+
struct i915_cache_context;
@@ -93,11 +127,6 @@ struct i915_state
float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4];
/** number of constants passed in through a constant buffer */
uint num_user_constants[PIPE_SHADER_TYPES];
- /** user constants, plus extra constants from shader translation */
- uint num_constants[PIPE_SHADER_TYPES];
-
- uint *program;
- uint program_len;
/* texture sampler state */
unsigned sampler[I915_TEX_UNITS][3];
@@ -187,7 +216,8 @@ struct i915_context
const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS];
const struct i915_depth_stencil_state *depth_stencil;
const struct i915_rasterizer_state *rasterizer;
- const struct pipe_shader_state *fs;
+
+ struct i915_fragment_shader *fs;
struct pipe_blend_color blend_color;
struct pipe_clip_state clip;
@@ -233,6 +263,7 @@ struct i915_context
#define I915_NEW_TEXTURE 0x800
#define I915_NEW_CONSTANTS 0x1000
#define I915_NEW_VBO 0x2000
+#define I915_NEW_VS 0x4000
/* Driver's internally generated state flags:
diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c
index 3c2069b827..96a54281f1 100644
--- a/src/gallium/drivers/i915simple/i915_flush.c
+++ b/src/gallium/drivers/i915simple/i915_flush.c
@@ -31,6 +31,7 @@
#include "pipe/p_defines.h"
+#include "draw/draw_context.h"
#include "i915_context.h"
#include "i915_reg.h"
#include "i915_batch.h"
@@ -44,6 +45,8 @@ static void i915_flush( struct pipe_context *pipe,
{
struct i915_context *i915 = i915_context(pipe);
+ draw_flush(i915->draw);
+
/* Do we need to emit an MI_FLUSH command to flush the hardware
* caches?
*/
diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h
index 8c7b68aefb..250dfe6dbf 100644
--- a/src/gallium/drivers/i915simple/i915_fpc.h
+++ b/src/gallium/drivers/i915simple/i915_fpc.h
@@ -44,9 +44,16 @@
* Program translation state
*/
struct i915_fp_compile {
- const struct pipe_shader_state *shader;
+ struct i915_fragment_shader *shader; /* the shader we're compiling */
- struct vertex_info *vertex_info;
+ boolean used_constants[I915_MAX_CONSTANT];
+
+ /** maps TGSI immediate index to constant slot */
+ uint num_immediates;
+ uint immediates_map[I915_MAX_CONSTANT];
+ float immediates[I915_MAX_CONSTANT][4];
+
+ boolean first_instruction;
uint declarations[I915_PROGRAM_SIZE];
uint program[I915_PROGRAM_SIZE];
@@ -57,11 +64,6 @@ struct i915_fp_compile {
uint output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
uint output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
- /** points into the i915->current.constants array: */
- float (*constants)[4];
- uint num_constants;
- uint constant_flags[I915_MAX_CONSTANT]; /**< status of each constant */
-
uint *csr; /**< Cursor, points into program. */
uint *decl; /**< Cursor, points into declarations. */
@@ -155,7 +157,9 @@ swizzle(int reg, uint x, uint y, uint z, uint w)
/***********************************************************************
* Public interface for the compiler
*/
-extern void i915_translate_fragment_program( struct i915_context *i915 );
+extern void
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs);
@@ -206,8 +210,5 @@ extern void i915_disassemble_program(const uint * program, uint sz);
extern void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
-extern void
-i915_translate_fragment_program(struct i915_context *i915);
-
#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c
index 74924ff0a1..4bdeefb449 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_emit.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c
@@ -61,8 +61,6 @@
(REG_NR_MASK << UREG_NR_SHIFT))
-#define I915_CONSTFLAG_PARAM 0x1f
-
uint
i915_get_temp(struct i915_fp_compile *p)
{
@@ -73,10 +71,21 @@ i915_get_temp(struct i915_fp_compile *p)
}
p->temp_flag |= 1 << (bit - 1);
- return UREG(REG_TYPE_R, (bit - 1));
+ return bit - 1;
+}
+
+
+static void
+i915_release_temp(struct i915_fp_compile *p, int reg)
+{
+ p->temp_flag &= ~(1 << reg);
}
+/**
+ * Get unpreserved temporary, a temp whose value is not preserved between
+ * PS program phases.
+ */
uint
i915_get_utemp(struct i915_fp_compile * p)
{
@@ -185,41 +194,62 @@ i915_emit_arith(struct i915_fp_compile * p,
return dest;
}
+
+/**
+ * Emit a texture load or texkill instruction.
+ * \param dest the dest i915 register
+ * \param destmask the dest register writemask
+ * \param sampler the i915 sampler register
+ * \param coord the i915 source texcoord operand
+ * \param opcode the instruction opcode
+ */
uint i915_emit_texld( struct i915_fp_compile *p,
uint dest,
uint destmask,
uint sampler,
uint coord,
- uint op )
+ uint opcode )
{
- uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+ int temp = -1;
+
if (coord != k) {
- /* No real way to work around this in the general case - need to
- * allocate and declare a new temporary register (a utemp won't
- * do). Will fallback for now.
+ /* texcoord is swizzled or negated. Need to allocate a new temporary
+ * register (a utemp / unpreserved temp) won't do.
*/
- i915_program_error(p, "Can't (yet) swizzle TEX arguments");
- assert(0);
- return 0;
+ uint tempReg;
+
+ temp = i915_get_temp(p); /* get temp reg index */
+ tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
+
+ i915_emit_arith( p, A0_MOV,
+ tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
+ 0, /* saturate */
+ coord, 0, 0 ); /* src0, src1, src2 */
+
+ /* new src texcoord is tempReg */
+ coord = tempReg;
}
/* Don't worry about saturate as we only support
*/
if (destmask != A0_DEST_CHANNEL_ALL) {
+ /* if not writing to XYZW... */
uint tmp = i915_get_utemp(p);
- i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+ i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
- return dest;
+ /* XXX release utemp here? */
}
else {
assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+ /* is the sampler coord a texcoord input reg? */
if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
p->nr_tex_indirect++;
}
- *(p->csr++) = (op |
+ *(p->csr++) = (opcode |
T0_DEST( dest ) |
T0_SAMPLER( sampler ));
@@ -227,14 +257,19 @@ uint i915_emit_texld( struct i915_fp_compile *p,
*(p->csr++) = T2_MBZ;
p->nr_tex_insn++;
- return dest;
}
+
+ if (temp >= 0)
+ i915_release_temp(p, temp);
+
+ return dest;
}
uint
i915_emit_const1f(struct i915_fp_compile * p, float c0)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -243,15 +278,15 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 4; idx++) {
- if (!(p->constant_flags[reg] & (1 << idx)) ||
- p->constants[reg][idx] == c0) {
- p->constants[reg][idx] = c0;
- p->constant_flags[reg] |= 1 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (1 << idx)) ||
+ ifs->constants[reg][idx] == c0) {
+ ifs->constants[reg][idx] = c0;
+ ifs->constant_flags[reg] |= 1 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
@@ -264,6 +299,7 @@ i915_emit_const1f(struct i915_fp_compile * p, float c0)
uint
i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
@@ -277,16 +313,16 @@ i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf ||
- p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+ if (ifs->constant_flags[reg] == 0xf ||
+ ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 3; idx++) {
- if (!(p->constant_flags[reg] & (3 << idx))) {
- p->constants[reg][idx + 0] = c0;
- p->constants[reg][idx + 1] = c1;
- p->constant_flags[reg] |= 3 << idx;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ if (!(ifs->constant_flags[reg] & (3 << idx))) {
+ ifs->constants[reg][idx + 0] = c0;
+ ifs->constants[reg][idx + 1] = c1;
+ ifs->constant_flags[reg] |= 3 << idx;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
}
}
@@ -302,25 +338,26 @@ uint
i915_emit_const4f(struct i915_fp_compile * p,
float c0, float c1, float c2, float c3)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned reg;
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
- if (p->constant_flags[reg] == 0xf &&
- p->constants[reg][0] == c0 &&
- p->constants[reg][1] == c1 &&
- p->constants[reg][2] == c2 &&
- p->constants[reg][3] == c3) {
+ if (ifs->constant_flags[reg] == 0xf &&
+ ifs->constants[reg][0] == c0 &&
+ ifs->constants[reg][1] == c1 &&
+ ifs->constants[reg][2] == c2 &&
+ ifs->constants[reg][3] == c3) {
return UREG(REG_TYPE_CONST, reg);
}
- else if (p->constant_flags[reg] == 0) {
-
- p->constants[reg][0] = c0;
- p->constants[reg][1] = c1;
- p->constants[reg][2] = c2;
- p->constants[reg][3] = c3;
- p->constant_flags[reg] = 0xf;
- if (reg + 1 > p->num_constants)
- p->num_constants = reg + 1;
+ else if (ifs->constant_flags[reg] == 0) {
+
+ ifs->constants[reg][0] = c0;
+ ifs->constants[reg][1] = c1;
+ ifs->constants[reg][2] = c2;
+ ifs->constants[reg][3] = c3;
+ ifs->constant_flags[reg] = 0xf;
+ if (reg + 1 > ifs->num_constants)
+ ifs->num_constants = reg + 1;
return UREG(REG_TYPE_CONST, reg);
}
}
@@ -335,41 +372,3 @@ i915_emit_const4fv(struct i915_fp_compile * p, const float * c)
{
return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
}
-
-
-#if 00000/*UNUSED*/
-/* Reserve a slot in the constant file for a Mesa state parameter.
- * These will later need to be tracked on statechanges, but that is
- * done elsewhere.
- */
-uint
-i915_emit_param4fv(struct i915_fp_compile * p, const float * values)
-{
- struct i915_fragment_program *fp = p->fp;
- int i;
-
- for (i = 0; i < fp->nr_params; i++) {
- if (fp->param[i].values == values)
- return UREG(REG_TYPE_CONST, fp->param[i].reg);
- }
-
- if (p->constants->nr_constants == I915_MAX_CONSTANT ||
- fp->nr_params == I915_MAX_CONSTANT) {
- i915_program_error(p, "i915_emit_param4fv: out of constants\n");
- return 0;
- }
-
- {
- int reg = p->constants->nr_constants++;
- int i = fp->nr_params++;
-
- assert (p->constant_flags[reg] == 0);
- p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
-
- fp->param[i].values = values;
- fp->param[i].reg = reg;
-
- return UREG(REG_TYPE_CONST, reg);
- }
-}
-#endif
diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c
index 6c1524c768..76a2184e9a 100644
--- a/src/gallium/drivers/i915simple/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c
@@ -34,6 +34,7 @@
#include "pipe/p_shader_tokens.h"
#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_dump.h"
#include "draw/draw_vertex.h"
@@ -97,19 +98,19 @@ negate(int reg, int x, int y, int z, int w)
}
+/**
+ * In the event of a translation failure, we'll generate a simple color
+ * pass-through program.
+ */
static void
-i915_use_passthrough_shader(struct i915_context *i915)
+i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
- debug_printf("**** Using i915 pass-through fragment shader\n");
-
- i915->current.program = (uint *) MALLOC(sizeof(passthrough));
- if (i915->current.program) {
- memcpy(i915->current.program, passthrough, sizeof(passthrough));
- i915->current.program_len = Elements(passthrough);
+ fs->program = (uint *) MALLOC(sizeof(passthrough));
+ if (fs->program) {
+ memcpy(fs->program, passthrough, sizeof(passthrough));
+ fs->program_len = Elements(passthrough);
}
-
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = 0;
- i915->current.num_user_constants[PIPE_SHADER_FRAGMENT] = 0;
+ fs->num_constants = 0;
}
@@ -161,9 +162,6 @@ src_vector(struct i915_fp_compile *p,
* We also use a texture coordinate to pass wpos when possible.
*/
- /* use vertex format info to map a slot number to a VF attrib */
- assert(index < p->vertex_info->num_attribs);
-
sem_name = p->input_semantic_name[index];
sem_ind = p->input_semantic_index[index];
@@ -201,7 +199,8 @@ src_vector(struct i915_fp_compile *p,
break;
case TGSI_FILE_IMMEDIATE:
- /* XXX unfinished - need to append immediates onto const buffer */
+ assert(index < p->num_immediates);
+ index = p->immediates_map[index];
/* fall-through */
case TGSI_FILE_CONSTANT:
src = UREG(REG_TYPE_CONST, index);
@@ -386,6 +385,26 @@ emit_simple_arith(struct i915_fp_compile *p,
arg3 );
}
+
+/** As above, but swap the first two src regs */
+static void
+emit_simple_arith_swap2(struct i915_fp_compile *p,
+ const struct tgsi_full_instruction *inst,
+ uint opcode, uint numArgs)
+{
+ struct tgsi_full_instruction inst2;
+
+ assert(numArgs == 2);
+
+ /* transpose first two registers */
+ inst2 = *inst;
+ inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
+ inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+
+ emit_simple_arith(p, &inst2, opcode, numArgs);
+}
+
+
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
@@ -556,8 +575,12 @@ i915_translate_instruction(struct i915_fp_compile *p,
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
- i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
- 0, src0, T0_TEXKILL);
+ i915_emit_texld(p,
+ tmp, /* dest reg: a dummy reg */
+ A0_DEST_CHANNEL_ALL, /* dest writemask */
+ 0, /* sampler */
+ src0, /* coord*/
+ T0_TEXKILL); /* opcode */
break;
case TGSI_OPCODE_LG2:
@@ -773,6 +796,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SGE, 2);
break;
+ case TGSI_OPCODE_SLE:
+ /* like SGE, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+ break;
+
case TGSI_OPCODE_SIN:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
tmp = i915_get_utemp(p);
@@ -827,6 +855,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith(p, inst, A0_SLT, 2);
break;
+ case TGSI_OPCODE_SGT:
+ /* like SLT, but swap reg0, reg1 */
+ emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+ break;
+
case TGSI_OPCODE_SUB:
src0 = src_vector(p, &inst->FullSrcRegisters[0]);
src1 = src_vector(p, &inst->FullSrcRegisters[1]);
@@ -880,6 +913,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
+ p->error = 1;
return;
}
@@ -896,6 +930,7 @@ static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct tgsi_token *tokens)
{
+ struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;
tgsi_parse_init( &parse, tokens );
@@ -928,13 +963,64 @@ i915_translate_instructions(struct i915_fp_compile *p,
p->output_semantic_name[ind] = sem;
p->output_semantic_index[ind] = semi;
}
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_CONSTANT) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(ifs->constant_flags[i] == 0x0);
+ ifs->constant_flags[i] = I915_CONSTFLAG_USER;
+ ifs->num_constants = MAX2(ifs->num_constants, i + 1);
+ }
+ }
+ else if (parse.FullToken.FullDeclaration.Declaration.File
+ == TGSI_FILE_TEMPORARY) {
+ uint i;
+ for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+ i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+ i++) {
+ assert(i < I915_MAX_TEMPORARY);
+ p->temp_flag |= (1 << i); /* mark temp as used */
+ }
+ }
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX append the immediate to the const buffer... */
+ {
+ const struct tgsi_full_immediate *imm
+ = &parse.FullToken.FullImmediate;
+ const uint pos = p->num_immediates++;
+ uint j;
+ for (j = 0; j < imm->Immediate.Size; j++) {
+ p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ }
+ }
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if (p->first_instruction) {
+ /* resolve location of immediates */
+ uint i, j;
+ for (i = 0; i < p->num_immediates; i++) {
+ /* find constant slot for this immediate */
+ for (j = 0; j < I915_MAX_CONSTANT; j++) {
+ if (ifs->constant_flags[j] == 0x0) {
+ memcpy(ifs->constants[j],
+ p->immediates[i],
+ 4 * sizeof(float));
+ /*printf("immediate %d maps to const %d\n", i, j);*/
+ ifs->constant_flags[j] = 0xf; /* all four comps used */
+ p->immediates_map[i] = j;
+ ifs->num_constants = MAX2(ifs->num_constants, j + 1);
+ break;
+ }
+ }
+ }
+
+ p->first_instruction = FALSE;
+ }
+
i915_translate_instruction(p, &parse.FullToken.FullInstruction);
break;
@@ -950,32 +1036,33 @@ i915_translate_instructions(struct i915_fp_compile *p,
static struct i915_fp_compile *
i915_init_compile(struct i915_context *i915,
- const struct pipe_shader_state *fs)
+ struct i915_fragment_shader *ifs)
{
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
- p->shader = i915->fs;
+ p->shader = ifs;
- p->vertex_info = &i915->current.vertex_info;
-
- /* new constants found during translation get appended after the
- * user-provided constants.
+ /* Put new constants at end of const buffer, growing downward.
+ * The problem is we don't know how many user-defined constants might
+ * be specified with pipe->set_constant_buffer().
+ * Should pre-scan the user's program to determine the highest-numbered
+ * constant referenced.
*/
- p->constants = i915->current.constants[PIPE_SHADER_FRAGMENT];
- p->num_constants = i915->current.num_user_constants[PIPE_SHADER_FRAGMENT];
+ ifs->num_constants = 0;
+ memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
+
+ p->first_instruction = TRUE;
p->nr_tex_indirect = 1; /* correct? */
p->nr_tex_insn = 0;
p->nr_alu_insn = 0;
p->nr_decl_insn = 0;
- memset(p->constant_flags, 0, sizeof(p->constant_flags));
-
p->csr = p->program;
p->decl = p->declarations;
p->decl_s = 0;
p->decl_t = 0;
- p->temp_flag = 0xffff000;
+ p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
p->utemp_flag = ~0x7;
p->wpos_tex = -1;
@@ -993,6 +1080,7 @@ i915_init_compile(struct i915_context *i915,
static void
i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
{
+ struct i915_fragment_shader *ifs = p->shader;
unsigned long program_size = (unsigned long) (p->csr - p->program);
unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
@@ -1008,19 +1096,13 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
if (p->nr_decl_insn > I915_MAX_DECL_INSN)
i915_program_error(p, "Exceeded max DECL instructions");
- /* free old program, if present */
- if (i915->current.program) {
- FREE(i915->current.program);
- i915->current.program_len = 0;
- }
-
if (p->error) {
p->NumNativeInstructions = 0;
p->NumNativeAluInstructions = 0;
p->NumNativeTexInstructions = 0;
p->NumNativeTexIndirections = 0;
- i915_use_passthrough_shader(i915);
+ i915_use_passthrough_shader(ifs);
}
else {
p->NumNativeInstructions
@@ -1034,24 +1116,20 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
/* Copy compilation results to fragment program struct:
*/
- i915->current.program
+ assert(!ifs->program);
+ ifs->program
= (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
- if (i915->current.program) {
- i915->current.program_len = program_size + decl_size;
+ if (ifs->program) {
+ ifs->program_len = program_size + decl_size;
- memcpy(i915->current.program,
+ memcpy(ifs->program,
p->declarations,
decl_size * sizeof(uint));
- memcpy(i915->current.program + decl_size,
+ memcpy(ifs->program + decl_size,
p->program,
program_size * sizeof(uint));
}
-
- /* update number of constants */
- i915->current.num_constants[PIPE_SHADER_FRAGMENT] = p->num_constants;
- assert(i915->current.num_constants[PIPE_SHADER_FRAGMENT]
- >= i915->current.num_user_constants[PIPE_SHADER_FRAGMENT]);
}
/* Release the compilation struct:
@@ -1085,7 +1163,7 @@ i915_find_wpos_space(struct i915_fp_compile *p)
i915_program_error(p, "No free texcoord for wpos value");
}
#else
- if (p->shader->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
/* frag shader using the fragment position input */
#if 0
assert(0);
@@ -1106,7 +1184,7 @@ static void
i915_fixup_depth_write(struct i915_fp_compile *p)
{
/* XXX assuming pos/depth is always in output[0] */
- if (p->shader->output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+ if (p->shader->state.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
const uint depth = UREG(REG_TYPE_OD, 0);
i915_emit_arith(p,
@@ -1121,13 +1199,18 @@ i915_fixup_depth_write(struct i915_fp_compile *p)
void
-i915_translate_fragment_program( struct i915_context *i915 )
+i915_translate_fragment_program( struct i915_context *i915,
+ struct i915_fragment_shader *fs)
{
- struct i915_fp_compile *p = i915_init_compile(i915, i915->fs);
- const struct tgsi_token *tokens = i915->fs->tokens;
+ struct i915_fp_compile *p = i915_init_compile(i915, fs);
+ const struct tgsi_token *tokens = fs->state.tokens;
i915_find_wpos_space(p);
+#if 0
+ tgsi_dump(tokens, 0);
+#endif
+
i915_translate_instructions(p, tokens);
i915_fixup_depth_write(p);
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index 44c4325936..d8de5178f6 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -72,38 +72,42 @@ emit_hw_vertex( struct i915_context *i915,
uint i;
uint count = 0; /* for debug/sanity */
+ assert(!i915->dirty);
+
for (i = 0; i < vinfo->num_attribs; i++) {
+ const uint j = vinfo->src_index[i];
+ const float *attrib = vertex->data[j];
switch (vinfo->emit[i]) {
case EMIT_OMIT:
/* no-op */
break;
case EMIT_1F:
- OUT_BATCH( fui(vertex->data[i][0]) );
+ OUT_BATCH( fui(attrib[0]) );
count++;
break;
case EMIT_2F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
count += 2;
break;
case EMIT_3F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
count += 3;
break;
case EMIT_4F:
- OUT_BATCH( fui(vertex->data[i][0]) );
- OUT_BATCH( fui(vertex->data[i][1]) );
- OUT_BATCH( fui(vertex->data[i][2]) );
- OUT_BATCH( fui(vertex->data[i][3]) );
+ OUT_BATCH( fui(attrib[0]) );
+ OUT_BATCH( fui(attrib[1]) );
+ OUT_BATCH( fui(attrib[2]) );
+ OUT_BATCH( fui(attrib[3]) );
count += 4;
break;
case EMIT_4UB:
- OUT_BATCH( pack_ub4(float_to_ubyte( vertex->data[i][2] ),
- float_to_ubyte( vertex->data[i][1] ),
- float_to_ubyte( vertex->data[i][0] ),
- float_to_ubyte( vertex->data[i][3] )) );
+ OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
+ float_to_ubyte( attrib[1] ),
+ float_to_ubyte( attrib[0] ),
+ float_to_ubyte( attrib[3] )) );
count += 1;
break;
default:
@@ -122,17 +126,19 @@ emit_prim( struct draw_stage *stage,
unsigned nr )
{
struct i915_context *i915 = setup_stage(stage)->i915;
- unsigned vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ unsigned vertex_size;
unsigned i;
- assert(vertex_size >= 12); /* never smaller than 12 bytes */
-
if (i915->dirty)
i915_update_derived( i915 );
if (i915->hardware_dirty)
i915_emit_hardware_state( i915 );
+ /* need to do this after validation! */
+ vertex_size = i915->current.vertex_info.size * 4; /* in bytes */
+ assert(vertex_size >= 12); /* never smaller than 12 bytes */
+
if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) {
FLUSH_BATCH();
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index c5bf6174f6..9d5f609220 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -83,6 +83,12 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render )
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
+
+ if (i915->dirty) {
+ /* make sure we have up to date vertex layout */
+ i915_update_derived( i915 );
+ }
+
return &i915->current.vertex_info;
}
@@ -143,7 +149,8 @@ i915_vbuf_render_draw( struct vbuf_render *render,
assert(nr_indices);
- assert((i915->dirty & ~I915_NEW_VBO) == 0);
+ /* this seems to be bogus, since we validate state right after this */
+ /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/
if (i915->dirty)
i915_update_derived( i915 );
diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index e055eed7e0..a35bdf941f 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -38,6 +38,7 @@
#include "i915_reg.h"
#include "i915_state.h"
#include "i915_state_inlines.h"
+#include "i915_fpc.h"
/* The i915 (and related graphics cores) do not support GL_CLAMP. The
@@ -416,26 +417,47 @@ static void i915_set_polygon_stipple( struct pipe_context *pipe,
}
-static void * i915_create_fs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *templ)
+
+static void *
+i915_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
{
- return 0;
+ struct i915_context *i915 = i915_context(pipe);
+ struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader);
+ if (!ifs)
+ return NULL;
+
+ ifs->state = *templ;
+
+ /* The shader's compiled to i915 instructions here */
+ i915_translate_fragment_program(i915, ifs);
+
+ return ifs;
}
-static void i915_bind_fs_state(struct pipe_context *pipe, void *fs)
+static void
+i915_bind_fs_state(struct pipe_context *pipe, void *shader)
{
struct i915_context *i915 = i915_context(pipe);
- i915->fs = (struct pipe_shader_state *)fs;
+ i915->fs = (struct i915_fragment_shader*) shader;
i915->dirty |= I915_NEW_FS;
}
-static void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
+static
+void i915_delete_fs_state(struct pipe_context *pipe, void *shader)
{
- /*do nothing*/
+ struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader;
+
+ if (ifs->program)
+ FREE(ifs->program);
+ ifs->program_len = 0;
+
+ FREE(ifs);
}
+
static void *
i915_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@@ -452,6 +474,8 @@ static void i915_bind_vs_state(struct pipe_context *pipe, void *shader)
/* just pass-through to draw module */
draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader);
+
+ i915->dirty |= I915_NEW_VS;
}
static void i915_delete_vs_state(struct pipe_context *pipe, void *shader)
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 4767584fc6..5cf70acdf3 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -27,104 +27,111 @@
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
#include "draw/draw_context.h"
#include "draw/draw_vertex.h"
#include "i915_context.h"
#include "i915_state.h"
#include "i915_reg.h"
#include "i915_fpc.h"
-#include "pipe/p_shader_tokens.h"
+
/**
- * Determine which post-transform / pre-rasterization vertex attributes
- * we need.
- * Derived from: fs, setup states.
+ * Determine the hardware vertex layout.
+ * Depends on vertex/fragment shader state.
*/
static void calculate_vertex_layout( struct i915_context *i915 )
{
- const struct pipe_shader_state *fs = i915->fs;
+ const struct pipe_shader_state *fs = &i915->fs->state;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
- uint front0 = 0, back0 = 0, front1 = 0, back1 = 0;
- boolean needW = 0;
+ boolean texCoords[8], colors[2], fog, needW;
uint i;
- boolean texCoords[8];
- uint src = 0;
+ int src;
memset(texCoords, 0, sizeof(texCoords));
+ colors[0] = colors[1] = fog = needW = FALSE;
memset(&vinfo, 0, sizeof(vinfo));
- /* pos */
- draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src++);
- /* Note: we'll set the S4_VFMT_XYZ[W] bits below */
-
+ /* Determine which fragment program inputs are needed. Setup HW vertex
+ * layout below, in the HW-specific attribute order.
+ */
for (i = 0; i < fs->num_inputs; i++) {
switch (fs->input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
break;
case TGSI_SEMANTIC_COLOR:
- if (fs->input_semantic_index[i] == 0) {
- front0 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_COLOR;
- }
- else {
- assert(fs->input_semantic_index[i] == 1);
- front1 = draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src++);
- vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
- }
+ assert(fs->input_semantic_index[i] < 2);
+ colors[fs->input_semantic_index[i]] = TRUE;
break;
case TGSI_SEMANTIC_GENERIC:
/* usually a texcoord */
{
const uint unit = fs->input_semantic_index[i];
- uint hwtc;
+ assert(unit < 8);
texCoords[unit] = TRUE;
- draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++);
- hwtc = TEXCOORDFMT_4D;
needW = TRUE;
- vinfo.hwfmt[1] |= hwtc << (unit * 4);
}
break;
case TGSI_SEMANTIC_FOG:
- debug_printf("i915 fogcoord not implemented yet\n");
- draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++);
+ fog = TRUE;
break;
default:
assert(0);
}
-
}
- /* finish up texcoord fields */
- for (i = 0; i < 8; i++) {
- if (!texCoords[i]) {
- const uint hwtc = TEXCOORDFMT_NOT_PRESENT;
- vinfo.hwfmt[1] |= hwtc << (i* 4);
- }
- }
-
- /* go back and fill in the vertex position info now that we have needW */
+
+ /* pos */
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
if (needW) {
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
vinfo.emit[0] = EMIT_4F;
}
else {
+ draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
vinfo.emit[0] = EMIT_3F;
}
- /* Additional attributes required for setup: Just twosided
- * lighting. Edgeflag is dealt with specially by setting bits in
- * the vertex header.
- */
- if (i915->rasterizer->light_twoside) {
- if (front0) {
- back0 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ /* hardware point size */
+ /* XXX todo */
+
+ /* primary color */
+ if (colors[0]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_COLOR;
+ }
+
+ /* secondary color */
+ if (colors[1]) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+ draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
+ vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
+ }
+
+ /* fog coord, not fog blend factor */
+ if (fog) {
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+ draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+ vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
+ }
+
+ /* texcoords */
+ for (i = 0; i < 8; i++) {
+ uint hwtc;
+ if (texCoords[i]) {
+ hwtc = TEXCOORDFMT_4D;
+ src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+ draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
- if (back0) {
- back1 = draw_emit_vertex_attr(&vinfo, EMIT_OMIT, colorInterp, src++);
+ else {
+ hwtc = TEXCOORDFMT_NOT_PRESENT;
}
+ vinfo.hwfmt[1] |= hwtc << (i * 4);
}
draw_compute_vertex_size(&vinfo);
@@ -148,7 +155,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
*/
void i915_update_derived( struct i915_context *i915 )
{
- if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS))
+ if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS))
calculate_vertex_layout( i915 );
if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE))
@@ -164,7 +171,6 @@ void i915_update_derived( struct i915_context *i915 )
i915_update_dynamic( i915 );
if (i915->dirty & I915_NEW_FS) {
- i915_translate_fragment_program(i915);
i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */
}
diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c
index 3339287f49..6bbaac4e34 100644
--- a/src/gallium/drivers/i915simple/i915_state_emit.c
+++ b/src/gallium/drivers/i915simple/i915_state_emit.c
@@ -99,7 +99,11 @@ i915_emit_hardware_state(struct i915_context *i915 )
2 + I915_TEX_UNITS*3 +
2 + I915_TEX_UNITS*3 +
2 + I915_MAX_CONSTANT*4 +
+#if 0
i915->current.program_len +
+#else
+ i915->fs->program_len +
+#endif
6
) * 3/2; /* plus 50% margin */
const unsigned relocs = ( I915_TEX_UNITS +
@@ -325,15 +329,34 @@ i915_emit_hardware_state(struct i915_context *i915 )
/* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */
if (i915->hardware_dirty & I915_HW_PROGRAM)
{
- const uint nr = i915->current.num_constants[PIPE_SHADER_FRAGMENT];
- assert(nr <= I915_MAX_CONSTANT);
- if (nr > 0) {
- const uint *c
- = (const uint *) i915->current.constants[PIPE_SHADER_FRAGMENT];
+ /* Collate the user-defined constants with the fragment shader's
+ * immediates according to the constant_flags[] array.
+ */
+ const uint nr = i915->fs->num_constants;
+ if (nr) {
uint i;
+
OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) );
OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) );
+
for (i = 0; i < nr; i++) {
+ const uint *c;
+ if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
+ /* grab user-defined constant */
+ c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i];
+ }
+ else {
+ /* emit program constant */
+ c = (uint *) i915->fs->constants[i];
+ }
+#if 0 /* debug */
+ {
+ float *f = (float *) c;
+ printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
+ (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
+ ? "user" : "immediate"));
+ }
+#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
@@ -348,9 +371,9 @@ i915_emit_hardware_state(struct i915_context *i915 )
{
uint i;
/* we should always have, at least, a pass-through program */
- assert(i915->current.program_len > 0);
- for (i = 0; i < i915->current.program_len; i++) {
- OUT_BATCH(i915->current.program[i]);
+ assert(i915->fs->program_len > 0);
+ for (i = 0; i < i915->fs->program_len; i++) {
+ OUT_BATCH(i915->fs->program[i]);
}
}