diff options
16 files changed, 1524 insertions, 1009 deletions
| diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 0d8dcb9f87..d83888d90a 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -13,6 +13,9 @@ C_SOURCES = \  		radeon_opcodes.c \  		radeon_program_alu.c \  		radeon_program_pair.c \ +		radeon_pair_translate.c \ +		radeon_pair_schedule.c \ +		radeon_pair_regalloc.c \  		radeon_dataflow.c \  		radeon_dataflow_deadcode.c \  		radeon_dataflow_swizzles.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 2dad3dba4a..375838d98e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -56,7 +56,6 @@ struct r300_emit_state {  };  #define PROG_CODE \ -	struct r300_emit_state * emit = (struct r300_emit_state*)data; \  	struct r300_fragment_program_compiler *c = emit->compiler; \  	struct r300_fragment_program_code *code = &c->code->code.r300 @@ -75,6 +74,18 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int  		code->pixsize = index;  } +static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ +	if (src.File == RC_FILE_CONSTANT) { +		return src.Index | (1 << 5); +	} else if (src.File == RC_FILE_TEMPORARY) { +		use_temporary(code, src.Index); +		return src.Index; +	} + +	return 0; +} +  static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)  { @@ -120,7 +131,7 @@ static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler  /**   * Emit one paired ALU instruction.   */ -static int emit_alu(void* data, struct radeon_pair_instruction* inst) +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)  {  	PROG_CODE; @@ -136,14 +147,10 @@ static int emit_alu(void* data, struct radeon_pair_instruction* inst)  	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);  	for(j = 0; j < 3; ++j) { -		unsigned int src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); -		if (!inst->RGB.Src[j].Constant) -			use_temporary(code, inst->RGB.Src[j].Index); +		unsigned int src = use_source(code, inst->RGB.Src[j]);  		code->alu.inst[ip].rgb_addr |= src << (6*j); -		src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); -		if (!inst->Alpha.Src[j].Constant) -			use_temporary(code, inst->Alpha.Src[j].Index); +		src = use_source(code, inst->Alpha.Src[j]);  		code->alu.inst[ip].alpha_addr |= src << (6*j);  		unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -203,7 +210,7 @@ static int finish_node(struct r300_emit_state * emit)  	if (code->alu.length == emit->node_first_alu) {  		/* Generate a single NOP for this node */ -		struct radeon_pair_instruction inst; +		struct rc_pair_instruction inst;  		memset(&inst, 0, sizeof(inst));  		if (!emit_alu(emit, &inst))  			return 0; @@ -248,7 +255,7 @@ static int finish_node(struct r300_emit_state * emit)   * Begin a block of texture instructions.   * Create the necessary indirection.   */ -static int begin_tex(void* data) +static int begin_tex(struct r300_emit_state * emit)  {  	PROG_CODE; @@ -273,7 +280,7 @@ static int begin_tex(void* data)  } -static int emit_tex(void* data, struct radeon_pair_texture_instruction* inst) +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)  {  	PROG_CODE; @@ -282,31 +289,31 @@ static int emit_tex(void* data, struct radeon_pair_texture_instruction* inst)  		return 0;  	} -	unsigned int unit = inst->TexSrcUnit; -	unsigned int dest = inst->DestIndex; +	unsigned int unit = inst->U.I.TexSrcUnit; +	unsigned int dest = inst->U.I.DstReg.Index;  	unsigned int opcode; -	switch(inst->Opcode) { -	case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; -	case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; -	case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; -	case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; +	switch(inst->U.I.Opcode) { +	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; +	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; +	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; +	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;  	default: -		error("Unknown texture opcode %i", inst->Opcode); +		error("Unknown texture opcode %i", inst->U.I.Opcode);  		return 0;  	} -	if (inst->Opcode == RADEON_OPCODE_KIL) { +	if (inst->U.I.Opcode == RC_OPCODE_KIL) {  		unit = 0;  		dest = 0;  	} else {  		use_temporary(code, dest);  	} -	use_temporary(code, inst->SrcIndex); +	use_temporary(code, inst->U.I.SrcReg[0].Index);  	code->tex.inst[code->tex.length++] = -		(inst->SrcIndex << R300_SRC_ADDR_SHIFT) | +		(inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |  		(dest << R300_DST_ADDR_SHIFT) |  		(unit << R300_TEX_ID_SHIFT) |  		(opcode << R300_TEX_INST_SHIFT); @@ -314,13 +321,6 @@ static int emit_tex(void* data, struct radeon_pair_texture_instruction* inst)  } -static const struct radeon_pair_handler pair_handler = { -	.EmitPaired = &emit_alu, -	.EmitTex = &emit_tex, -	.BeginTexBlock = &begin_tex, -	.MaxHwTemps = R300_PFS_NUM_TEMP_REGS -}; -  /**   * Final compilation step: Turn the intermediate radeon_program into   * machine-readable instructions. @@ -335,7 +335,24 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi  	memset(code, 0, sizeof(struct r300_fragment_program_code)); -	radeonPairProgram(compiler, &pair_handler, &emit); +	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; +	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; +	    inst = inst->Next) { +		if (inst->Type == RC_INSTRUCTION_NORMAL) { +			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { +				begin_tex(&emit); +				continue; +			} + +			emit_tex(&emit, inst); +		} else { +			emit_alu(&emit, &inst->U.P); +		} +	} + +	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) +		rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); +  	if (compiler->Base.Error)  		return; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 746e4495fe..5581f25352 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -114,6 +114,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  	}  	rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); +	if (c->Base.Error) +		return;  	if (c->Base.Debug) {  		fprintf(stderr, "Fragment Program: After deadcode:\n"); @@ -122,6 +124,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  	}  	rc_dataflow_swizzles(&c->Base); +	if (c->Base.Error) +		return;  	if (c->Base.Debug) {  		fprintf(stderr, "Compiler: after dataflow passes:\n"); @@ -129,6 +133,40 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)  		fflush(stderr);  	} +	rc_pair_translate(c); +	if (c->Base.Error) +		return; + +	if (c->Base.Debug) { +		fprintf(stderr, "Compiler: after pair translate:\n"); +		rc_print_program(&c->Base.Program); +		fflush(stderr); +	} + +	rc_pair_schedule(c); +	if (c->Base.Error) +		return; + +	if (c->Base.Debug) { +		fprintf(stderr, "Compiler: after pair scheduling:\n"); +		rc_print_program(&c->Base.Program); +		fflush(stderr); +	} + +	if (c->is_r500) +		rc_pair_regalloc(c, 128); +	else +		rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + +	if (c->Base.Error) +		return; + +	if (c->Base.Debug) { +		fprintf(stderr, "Compiler: after pair register allocation:\n"); +		rc_print_program(&c->Base.Program); +		fflush(stderr); +	} +  	if (c->is_r500) {  		r500BuildFragmentProgramHwCode(c);  	} else { diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 2f0c0d5283..8f618d88ad 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -37,10 +37,6 @@   *   * \author Corbin Simpson <MostAwesomeDude@gmail.com>   * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - *   */  #include "r500_fragprog.h" @@ -51,7 +47,6 @@  #define PROG_CODE \ -	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \  	struct r500_fragment_program_code *code = &c->code->code.r500  #define error(fmt, args...) do {			\ @@ -114,7 +109,7 @@ static unsigned int fix_hw_swizzle(unsigned int swz)  	return swz;  } -static unsigned int translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)  {  	unsigned int t = inst->RGB.Arg[arg].Source;  	int comp; @@ -127,7 +122,7 @@ static unsigned int translate_arg_rgb(struct radeon_pair_instruction *inst, int  	return t;  } -static unsigned int translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)  {  	unsigned int t = inst->Alpha.Arg[i].Source;  	t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; @@ -144,16 +139,21 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int  static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)  { -	if (!src.Constant) +	if (src.File == RC_FILE_CONSTANT) { +		return src.Index | 0x100; +	} else if (src.File == RC_FILE_TEMPORARY) {  		use_temporary(code, src.Index); -	return src.Index | src.Constant << 8; +		return src.Index; +	} + +	return 0;  }  /**   * Emit a paired ALU instruction.   */ -static int emit_paired(void *data, struct radeon_pair_instruction *inst) +static int emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)  {  	PROG_CODE; @@ -221,7 +221,7 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle)  /**   * Emit a single TEX instruction   */ -static int emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)  {  	PROG_CODE; @@ -233,46 +233,44 @@ static int emit_tex(void *data, struct radeon_pair_texture_instruction *inst)  	int ip = ++code->inst_end;  	code->inst[ip].inst0 = R500_INST_TYPE_TEX -		| (inst->WriteMask << 11) +		| (inst->DstReg.WriteMask << 11)  		| R500_INST_TEX_SEM_WAIT;  	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)  		| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;  	if (inst->TexSrcTarget == RC_TEXTURE_RECT) -	        code->inst[ip].inst1 |= R500_TEX_UNSCALED; +		code->inst[ip].inst1 |= R500_TEX_UNSCALED;  	switch (inst->Opcode) { -	case RADEON_OPCODE_KIL: +	case RC_OPCODE_KIL:  		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;  		break; -	case RADEON_OPCODE_TEX: +	case RC_OPCODE_TEX:  		code->inst[ip].inst1 |= R500_TEX_INST_LD;  		break; -	case RADEON_OPCODE_TXB: +	case RC_OPCODE_TXB:  		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;  		break; -	case RADEON_OPCODE_TXP: +	case RC_OPCODE_TXP:  		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;  		break;  	default:  		error("emit_tex can't handle opcode %x\n", inst->Opcode);  	} -	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) -		| (translate_strq_swizzle(inst->SrcSwizzle) << 8) -		| R500_TEX_DST_ADDR(inst->DestIndex) +	use_temporary(code, inst->SrcReg[0].Index); +	if (inst->Opcode != RC_OPCODE_KIL) +		use_temporary(code, inst->DstReg.Index); + +	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) +		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) +		| R500_TEX_DST_ADDR(inst->DstReg.Index)  		| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G  		| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;  	return 1;  } -static const struct radeon_pair_handler pair_handler = { -	.EmitPaired = emit_paired, -	.EmitTex = emit_tex, -	.MaxHwTemps = 128 -}; -  void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)  {  	struct r500_fragment_program_code *code = &compiler->code->code.r500; @@ -281,7 +279,22 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi  	code->max_temp_idx = 1;  	code->inst_end = -1; -	radeonPairProgram(compiler, &pair_handler, compiler); +	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; +	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; +	    inst = inst->Next) { +		if (inst->Type == RC_INSTRUCTION_NORMAL) { +			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) +				continue; + +			emit_tex(compiler, &inst->U.I); +		} else { +			emit_paired(compiler, &inst->U.P); +		} +	} + +	if (code->max_temp_idx >= 128) +		rc_error(&compiler->Base, "Too many hardware temporaries used"); +  	if (compiler->Base.Error)  		return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index a171d8ab54..58dcb20d29 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -27,6 +27,136 @@  #include "radeon_dataflow.h" -#include "radeon_compiler.h" +#include "radeon_program.h" +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ +	struct rc_sub_instruction * inst = &fullinst->U.I; +	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + +	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { +		unsigned int refmask = 0; + +		if (inst->SrcReg[src].File == RC_FILE_NONE) +			return; + +		for(unsigned int chan = 0; chan < 4; ++chan) +			refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); + +		refmask &= ~RC_MASK_XYZW; + +		for(unsigned int chan = 0; chan < 4; ++chan) { +			if (GET_BIT(refmask, chan)) { +				cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); +			} +		} + +		if (refmask && inst->SrcReg[src].RelAddr) +			cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); +	} +} + +static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, void * userdata) +{ +	struct rc_pair_instruction * inst = &fullinst->U.P; +	unsigned int refmasks[3] = { 0, 0, 0 }; + +	if (inst->RGB.Opcode != RC_OPCODE_NOP) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + +		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { +			for(unsigned int chan = 0; chan < 3; ++chan) { +				unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); +				if (swz < 4) +					refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; +			} +		} +	} + +	if (inst->Alpha.Opcode != RC_OPCODE_NOP) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + +		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { +			if (inst->Alpha.Arg[arg].Swizzle < 4) +				refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; +		} +	} + +	for(unsigned int src = 0; src < 3; ++src) { +		if (inst->RGB.Src[src].Used) { +			for(unsigned int chan = 0; chan < 3; ++chan) { +				if (GET_BIT(refmasks[src], chan)) +					cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); +			} +		} + +		if (inst->Alpha.Src[src].Used) { +			if (GET_BIT(refmasks[src], 3)) +				cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); +		} +	} +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ +	if (inst->Type == RC_INSTRUCTION_NORMAL) { +		reads_normal(inst, cb, userdata); +	} else { +		reads_pair(inst, cb, userdata); +	} +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ +	struct rc_sub_instruction * inst = &fullinst->U.I; +	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + +	if (opcode->HasDstReg) { +		for(unsigned int chan = 0; chan < 4; ++chan) { +			if (GET_BIT(inst->DstReg.WriteMask, chan)) +				cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); +		} +	} + +	if (inst->WriteALUResult) +		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ +	struct rc_pair_instruction * inst = &fullinst->U.P; + +	for(unsigned int chan = 0; chan < 3; ++chan) { +		if (GET_BIT(inst->RGB.WriteMask, chan)) +			cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); +	} + +	if (inst->Alpha.WriteMask) +		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + +	if (inst->WriteALUResult) +		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ +	if (inst->Type == RC_INSTRUCTION_NORMAL) { +		writes_normal(inst, cb, userdata); +	} else { +		writes_pair(inst, cb, userdata); +	} +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 76c323d057..5aa4cb64f3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,6 +36,17 @@ struct rc_swizzle_caps;  /** + * Help analyze the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +			rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +/*@}*/ + + +/**   * Compiler passes based on dataflow analysis.   */  /*@{*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index e097a62b55..a5072b5e1e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -363,6 +363,10 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {  		.Opcode = RC_OPCODE_REPL_ALPHA,  		.Name = "REPL_ALPHA",  		.HasDstReg = 1 +	}, +	{ +		.Opcode = RC_OPCODE_BEGIN_TEX, +		.Name = "BEGIN_TEX"  	}  }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index f8ba5255ca..c9c5b9f80f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -183,6 +183,11 @@ typedef enum {  	 * across all other channels */  	RC_OPCODE_REPL_ALPHA, +	/** special instruction, used in R300-R500 fragment programs +	 * to indicate the start of a block of texture instructions that +	 * can run simultaneously. */ +	RC_OPCODE_BEGIN_TEX, +  	MAX_RC_OPCODE  } rc_opcode; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 0000000000..e39ac2f510 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct live_intervals { +	int Start; +	int End; +	struct live_intervals * Next; +}; + +struct register_info { +	struct live_intervals Live; + +	unsigned int Used:1; +	unsigned int Allocated:1; +	rc_register_file File:3; +	unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct hardware_register { +	struct live_intervals * Used; +}; + +struct regalloc_state { +	struct radeon_compiler * C; + +	struct register_info Input[RC_REGISTER_MAX_INDEX]; +	struct register_info Temporary[RC_REGISTER_MAX_INDEX]; + +	struct hardware_register * HwTemporary; +	unsigned int NumHwTemporaries; +}; + +static void print_live_intervals(struct live_intervals * src) +{ +	if (!src) { +		DBG("(null)"); +		return; +	} + +	while(src) { +		DBG("(%i,%i)", src->Start, src->End); +		src = src->Next; +	} +} + +static void add_live_intervals(struct regalloc_state * s, +		struct live_intervals ** dst, struct live_intervals * src) +{ +	struct live_intervals ** dst_backup = dst; + +	if (VERBOSE) { +		DBG("add_live_intervals: "); +		print_live_intervals(*dst); +		DBG(" to "); +		print_live_intervals(src); +		DBG("\n"); +	} + +	while(src) { +		if (*dst && (*dst)->End < src->Start) { +			dst = &(*dst)->Next; +		} else if (!*dst || (*dst)->Start > src->End) { +			struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); +			li->Start = src->Start; +			li->End = src->End; +			li->Next = *dst; +			*dst = li; +			src = src->Next; +		} else { +			if (src->End > (*dst)->End) +				(*dst)->End = src->End; +			if (src->Start < (*dst)->Start) +				(*dst)->Start = src->Start; +			src = src->Next; +		} +	} + +	if (VERBOSE) { +		DBG("    result: "); +		print_live_intervals(*dst_backup); +		DBG("\n"); +	} +} + +static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) +{ +	if (VERBOSE) { +		DBG("overlap_live_intervals: "); +		print_live_intervals(dst); +		DBG(" to "); +		print_live_intervals(src); +		DBG("\n"); +	} + +	while(src && dst) { +		if (dst->End <= src->Start) { +			dst = dst->Next; +		} else if (dst->End <= src->End) { +			DBG("    overlap\n"); +			return 1; +		} else if (dst->Start < src->End) { +			DBG("    overlap\n"); +			return 1; +		} else { +			src = src->Next; +		} +	} + +	DBG("    no overlap\n"); + +	return 0; +} + +static int try_add_live_intervals(struct regalloc_state * s, +		struct live_intervals ** dst, struct live_intervals * src) +{ +	if (overlap_live_intervals(*dst, src)) +		return 0; + +	add_live_intervals(s, dst, src); +	return 1; +} + +static void scan_callback(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int chan) +{ +	struct regalloc_state * s = data; +	struct register_info * reg; + +	if (file == RC_FILE_TEMPORARY) +		reg = &s->Temporary[index]; +	else if (file == RC_FILE_INPUT) +		reg = &s->Input[index]; +	else +		return; + +	if (!reg->Used) { +		reg->Used = 1; +		if (file == RC_FILE_INPUT) +			reg->Live.Start = -1; +		else +			reg->Live.Start = inst->IP; +	} else { +		if (inst->IP > reg->Live.End) +			reg->Live.End = inst->IP; +	} +} + +static void compute_live_intervals(struct regalloc_state * s) +{ +	rc_recompute_ips(s->C); + +	for(struct rc_instruction * inst = s->C->Program.Instructions.Next; +	    inst != &s->C->Program.Instructions; +	    inst = inst->Next) { +		rc_for_all_reads(inst, scan_callback, s); +		rc_for_all_writes(inst, scan_callback, s); +	} +} + +static void rewrite_register(struct regalloc_state * s, +		rc_register_file * file, unsigned int * index) +{ +	const struct register_info * reg; + +	if (*file == RC_FILE_TEMPORARY) +		reg = &s->Temporary[*index]; +	else if (*file == RC_FILE_INPUT) +		reg = &s->Input[*index]; +	else +		return; + +	if (reg->Allocated) { +		*file = reg->File; +		*index = reg->Index; +	} +} + +static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) +{ +	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + +	if (opcode->HasDstReg) { +		rc_register_file file = inst->DstReg.File; +		unsigned int index = inst->DstReg.Index; + +		rewrite_register(s, &file, &index); + +		inst->DstReg.File = file; +		inst->DstReg.Index = index; +	} + +	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { +		rc_register_file file = inst->SrcReg[src].File; +		unsigned int index = inst->SrcReg[src].Index; + +		rewrite_register(s, &file, &index); + +		inst->SrcReg[src].File = file; +		inst->SrcReg[src].Index = index; +	} +} + +static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) +{ +	if (inst->RGB.WriteMask) { +		rc_register_file file = RC_FILE_TEMPORARY; +		unsigned int index = inst->RGB.DestIndex; + +		rewrite_register(s, &file, &index); + +		inst->RGB.DestIndex = index; +	} + +	if (inst->Alpha.WriteMask) { +		rc_register_file file = RC_FILE_TEMPORARY; +		unsigned int index = inst->Alpha.DestIndex; + +		rewrite_register(s, &file, &index); + +		inst->Alpha.DestIndex = index; +	} + +	for(unsigned int src = 0; src < 3; ++src) { +		if (inst->RGB.Src[src].Used) { +			rc_register_file file = inst->RGB.Src[src].File; +			unsigned int index = inst->RGB.Src[src].Index; + +			rewrite_register(s, &file, &index); + +			inst->RGB.Src[src].File = file; +			inst->RGB.Src[src].Index = index; +		} + +		if (inst->Alpha.Src[src].Used) { +			rc_register_file file = inst->Alpha.Src[src].File; +			unsigned int index = inst->Alpha.Src[src].Index; + +			rewrite_register(s, &file, &index); + +			inst->Alpha.Src[src].File = file; +			inst->Alpha.Src[src].Index = index; +		} +	} +} + +static void do_regalloc(struct regalloc_state * s) +{ +	/* Simple and stupid greedy register allocation */ +	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { +		struct register_info * reg = &s->Temporary[index]; + +		if (!reg->Used) +			continue; + +		for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { +			if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { +				reg->Allocated = 1; +				reg->File = RC_FILE_TEMPORARY; +				reg->Index = hwreg; +				goto success; +			} +		} + +		rc_error(s->C, "Ran out of hardware temporaries\n"); +		return; + +	success:; +	} + +	/* Rewrite all instructions based on the translation table we built */ +	for(struct rc_instruction * inst = s->C->Program.Instructions.Next; +	    inst != &s->C->Program.Instructions; +	    inst = inst->Next) { +		if (inst->Type == RC_INSTRUCTION_NORMAL) +			rewrite_normal_instruction(s, &inst->U.I); +		else +			rewrite_pair_instruction(s, &inst->U.P); +	} +} + +static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +{ +	struct regalloc_state * s = data; + +	if (!s->Input[input].Used) +		return; + +	add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); + +	s->Input[input].Allocated = 1; +	s->Input[input].File = RC_FILE_TEMPORARY; +	s->Input[input].Index = hwreg; + +} + +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps) +{ +	struct regalloc_state s; + +	memset(&s, 0, sizeof(s)); +	s.C = &c->Base; +	s.NumHwTemporaries = maxtemps; +	s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register)); +	memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register)); + +	compute_live_intervals(&s); + +	c->AllocateHwInputs(c, &alloc_input, &s); + +	do_regalloc(&s); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 0000000000..8a4b5ac8a9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,479 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { +	struct rc_instruction * Instruction; + +	/** Next instruction in the linked list of ready instructions. */ +	struct schedule_instruction *NextReady; + +	/** Values that this instruction reads and writes */ +	struct reg_value * WriteValues[4]; +	struct reg_value * ReadValues[12]; +	unsigned int NumWriteValues:3; +	unsigned int NumReadValues:4; + +	/** +	 * Number of (read and write) dependencies that must be resolved before +	 * this instruction can be scheduled. +	 */ +	unsigned int NumDependencies:5; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { +	struct schedule_instruction *Reader; +	struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { +	struct schedule_instruction * Writer; + +	/** +	 * Unordered linked list of instructions that read from this value. +	 * When this value becomes available, we increase all readers' +	 * dependency count. +	 */ +	struct reg_value_reader *Readers; + +	/** +	 * Number of readers of this value. This is decremented each time +	 * a reader of the value is committed. +	 * When the reader cound reaches zero, the dependency count +	 * of the instruction writing \ref Next is decremented. +	 */ +	unsigned int NumReaders; + +	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { +	struct reg_value * Values[4]; +}; + +struct schedule_state { +	struct radeon_compiler * C; +	struct schedule_instruction * Current; + +	struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + +	/** +	 * Linked lists of instructions that can be scheduled right now, +	 * based on which ALU/TEX resources they require. +	 */ +	/*@{*/ +	struct schedule_instruction *ReadyFullALU; +	struct schedule_instruction *ReadyRGB; +	struct schedule_instruction *ReadyAlpha; +	struct schedule_instruction *ReadyTEX; +	/*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, +		rc_register_file file, unsigned int index, unsigned int chan) +{ +	if (file != RC_FILE_TEMPORARY) +		return 0; + +	if (index >= RC_REGISTER_MAX_INDEX) { +		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); +		return 0; +	} + +	return &s->Temporary[index].Values[chan]; +} + +static struct reg_value * get_reg_value(struct schedule_state * s, +		rc_register_file file, unsigned int index, unsigned int chan) +{ +	struct reg_value ** pv = get_reg_valuep(s, file, index, chan); +	if (!pv) +		return 0; +	return *pv; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ +	inst->NextReady = *list; +	*list = inst; +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ +	DBG("%i is now ready\n", sinst->Instruction->IP); + +	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) +		add_inst_to_list(&s->ReadyTEX, sinst); +	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) +		add_inst_to_list(&s->ReadyRGB, sinst); +	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) +		add_inst_to_list(&s->ReadyAlpha, sinst); +	else +		add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ +	assert(sinst->NumDependencies > 0); +	sinst->NumDependencies--; +	if (!sinst->NumDependencies) +		instruction_ready(s, sinst); +} + +static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ +	DBG("%i: commit\n", sinst->Instruction->IP); + +	for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { +		struct reg_value * v = sinst->ReadValues[i]; +		assert(v->NumReaders > 0); +		v->NumReaders--; +		if (!v->NumReaders) { +			if (v->Next) +				decrease_dependencies(s, v->Next->Writer); +		} +	} + +	for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { +		struct reg_value * v = sinst->WriteValues[i]; +		for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { +			decrease_dependencies(s, r->Reader); +		} +	} +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ +	struct schedule_instruction *readytex; + +	assert(s->ReadyTEX); + +	/* Don't let the ready list change under us! */ +	readytex = s->ReadyTEX; +	s->ReadyTEX = 0; + +	/* Node marker for R300 */ +	struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); +	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + +	/* Link texture instructions back in */ +	while(readytex) { +		struct schedule_instruction * tex = readytex; +		readytex = readytex->NextReady; + +		rc_insert_instruction(before->Prev, tex->Instruction); +		commit_instruction(s, tex); +	} +} + + +static int destructive_merge_instructions( +		struct rc_pair_instruction * rgb, +		struct rc_pair_instruction * alpha) +{ +	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); +	assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + +	/* Copy alpha args into rgb */ +	const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + +	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { +		unsigned int srcrgb = 0; +		unsigned int srcalpha = 0; +		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; +		rc_register_file file = 0; +		unsigned int index = 0; + +		if (alpha->Alpha.Arg[arg].Swizzle < 3) { +			srcrgb = 1; +			file = alpha->RGB.Src[oldsrc].File; +			index = alpha->RGB.Src[oldsrc].Index; +		} else if (alpha->Alpha.Arg[arg].Swizzle < 4) { +			srcalpha = 1; +			file = alpha->Alpha.Src[oldsrc].File; +			index = alpha->Alpha.Src[oldsrc].Index; +		} + +		int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); +		if (source < 0) +			return 0; + +		rgb->Alpha.Arg[arg].Source = source; +		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; +		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; +		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; +	} + +	/* Copy alpha opcode into rgb */ +	rgb->Alpha.Opcode = alpha->Alpha.Opcode; +	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; +	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; +	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; +	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; +	rgb->Alpha.Saturate = alpha->Alpha.Saturate; + +	/* Merge ALU result writing */ +	if (alpha->WriteALUResult) { +		if (rgb->WriteALUResult) +			return 0; + +		rgb->WriteALUResult = alpha->WriteALUResult; +		rgb->ALUResultCompare = alpha->ALUResultCompare; +	} + +	return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ +	struct rc_pair_instruction backup; + +	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + +	if (destructive_merge_instructions(rgb, alpha)) +		return 1; + +	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); +	return 0; +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ +	struct schedule_instruction * sinst; + +	if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { +		if (s->ReadyFullALU) { +			sinst = s->ReadyFullALU; +			s->ReadyFullALU = s->ReadyFullALU->NextReady; +		} else if (s->ReadyRGB) { +			sinst = s->ReadyRGB; +			s->ReadyRGB = s->ReadyRGB->NextReady; +		} else { +			sinst = s->ReadyAlpha; +			s->ReadyAlpha = s->ReadyAlpha->NextReady; +		} + +		rc_insert_instruction(before->Prev, sinst->Instruction); +		commit_instruction(s, sinst); +	} else { +		struct schedule_instruction **prgb; +		struct schedule_instruction **palpha; + +		/* Some pairings might fail because they require too +		 * many source slots; try all possible pairings if necessary */ +		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { +			for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { +				struct schedule_instruction * psirgb = *prgb; +				struct schedule_instruction * psialpha = *palpha; + +				if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) +					continue; + +				*prgb = (*prgb)->NextReady; +				*palpha = (*palpha)->NextReady; +				rc_insert_instruction(before->Prev, psirgb->Instruction); +				commit_instruction(s, psirgb); +				commit_instruction(s, psialpha); +				goto success; +			} +		} + +		/* No success in pairing; just take the first RGB instruction */ +		sinst = s->ReadyRGB; +		s->ReadyRGB = s->ReadyRGB->NextReady; + +		rc_insert_instruction(before->Prev, sinst->Instruction); +		commit_instruction(s, sinst); +	success: ; +	} +} + +static void scan_read(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int chan) +{ +	struct schedule_state * s = data; +	struct reg_value * v = get_reg_value(s, file, index, chan); + +	if (!v) +		return; + +	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + +	struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); +	reader->Reader = s->Current; +	reader->Next = v->Readers; +	v->Readers = reader; +	v->NumReaders++; + +	s->Current->NumDependencies++; + +	if (s->Current->NumReadValues >= 12) { +		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); +	} else { +		s->Current->ReadValues[s->Current->NumReadValues++] = v; +	} +} + +static void scan_write(void * data, struct rc_instruction * inst, +		rc_register_file file, unsigned int index, unsigned int chan) +{ +	struct schedule_state * s = data; +	struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + +	if (!pv) +		return; + +	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + +	struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); +	memset(newv, 0, sizeof(*newv)); + +	newv->Writer = s->Current; + +	if (*pv) { +		(*pv)->Next = newv; +		s->Current->NumDependencies++; +	} + +	*pv = newv; + +	if (s->Current->NumWriteValues >= 4) { +		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); +	} else { +		s->Current->WriteValues[s->Current->NumWriteValues++] = newv; +	} +} + +static void schedule_block(struct r300_fragment_program_compiler * c, +		struct rc_instruction * begin, struct rc_instruction * end) +{ +	struct schedule_state s; + +	memset(&s, 0, sizeof(s)); +	s.C = &c->Base; + +	/* Scan instructions for data dependencies */ +	unsigned int ip = 0; +	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { +		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); +		memset(s.Current, 0, sizeof(struct schedule_instruction)); + +		s.Current->Instruction = inst; +		inst->IP = ip++; + +		DBG("%i: Scanning\n", inst->IP); + +		rc_for_all_reads(inst, &scan_read, &s); +		rc_for_all_writes(inst, &scan_write, &s); + +		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + +		if (!s.Current->NumDependencies) +			instruction_ready(&s, s.Current); +	} + +	/* Temporarily unlink all instructions */ +	begin->Prev->Next = end; +	end->Prev = begin->Prev; + +	/* Schedule instructions back */ +	while(!s.C->Error && +	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { +		if (s.ReadyTEX) +			emit_all_tex(&s, end); + +		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) +			emit_one_alu(&s, end); +	} +} + +static int is_controlflow(struct rc_instruction * inst) +{ +	if (inst->Type == RC_INSTRUCTION_NORMAL) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); +		return opcode->IsControlFlow; +	} +	return 0; +} + +void rc_pair_schedule(struct r300_fragment_program_compiler *c) +{ +	struct rc_instruction * inst = c->Base.Program.Instructions.Next; +	while(inst != &c->Base.Program.Instructions) { +		if (is_controlflow(inst)) { +			inst = inst->Next; +			continue; +		} + +		struct rc_instruction * first = inst; + +		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) +			inst = inst->Next; + +		DBG("Schedule one block\n"); +		schedule_block(c, first, inst); +	} +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c new file mode 100644 index 0000000000..c31891a62f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ +	struct rc_src_register tmp; + +	switch(inst->Opcode) { +	case RC_OPCODE_ADD: +		inst->SrcReg[2] = inst->SrcReg[1]; +		inst->SrcReg[1].File = RC_FILE_NONE; +		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; +		inst->SrcReg[1].Negate = RC_MASK_NONE; +		inst->Opcode = RC_OPCODE_MAD; +		break; +	case RC_OPCODE_CMP: +		tmp = inst->SrcReg[2]; +		inst->SrcReg[2] = inst->SrcReg[0]; +		inst->SrcReg[0] = tmp; +		break; +	case RC_OPCODE_MOV: +		/* AMD say we should use CMP. +		 * However, when we transform +		 *  KIL -r0; +		 * into +		 *  CMP tmp, -r0, -r0, 0; +		 *  KIL tmp; +		 * we get incorrect behaviour on R500 when r0 == 0.0. +		 * It appears that the R500 KIL hardware treats -0.0 as less +		 * than zero. +		 */ +		inst->SrcReg[1].File = RC_FILE_NONE; +		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; +		inst->SrcReg[2].File = RC_FILE_NONE; +		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; +		inst->Opcode = RC_OPCODE_MAD; +		break; +	case RC_OPCODE_MUL: +		inst->SrcReg[2].File = RC_FILE_NONE; +		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; +		inst->Opcode = RC_OPCODE_MAD; +		break; +	default: +		/* nothing to do */ +		break; +	} +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, +	int * needrgb, int * needalpha, int * istranscendent) +{ +	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; +	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; +	*istranscendent = 0; + +	if (inst->WriteALUResult == RC_ALURESULT_X) +		*needrgb = 1; +	else if (inst->WriteALUResult == RC_ALURESULT_W) +		*needalpha = 1; + +	switch(inst->Opcode) { +	case RC_OPCODE_ADD: +	case RC_OPCODE_CMP: +	case RC_OPCODE_DDX: +	case RC_OPCODE_DDY: +	case RC_OPCODE_FRC: +	case RC_OPCODE_MAD: +	case RC_OPCODE_MAX: +	case RC_OPCODE_MIN: +	case RC_OPCODE_MOV: +	case RC_OPCODE_MUL: +		break; +	case RC_OPCODE_COS: +	case RC_OPCODE_EX2: +	case RC_OPCODE_LG2: +	case RC_OPCODE_RCP: +	case RC_OPCODE_RSQ: +	case RC_OPCODE_SIN: +		*istranscendent = 1; +		*needalpha = 1; +		break; +	case RC_OPCODE_DP4: +		*needalpha = 1; +		/* fall through */ +	case RC_OPCODE_DP3: +		*needrgb = 1; +		break; +	default: +		break; +	} +} + + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, +	struct rc_pair_instruction * pair, +	struct rc_sub_instruction * inst) +{ +	memset(pair, 0, sizeof(struct rc_pair_instruction)); + +	int needrgb, needalpha, istranscendent; +	classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + +	if (needrgb) { +		if (istranscendent) +			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; +		else +			pair->RGB.Opcode = inst->Opcode; +		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) +			pair->RGB.Saturate = 1; +	} +	if (needalpha) { +		pair->Alpha.Opcode = inst->Opcode; +		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) +			pair->Alpha.Saturate = 1; +	} + +	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); +	int nargs = opcode->NumSrcRegs; +	int i; + +	/* Special case for DDX/DDY (MDH/MDV). */ +	if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { +		nargs++; +	} + +	for(i = 0; i < opcode->NumSrcRegs; ++i) { +		int source; +		if (needrgb && !istranscendent) { +			unsigned int srcrgb = 0; +			unsigned int srcalpha = 0; +			int j; +			for(j = 0; j < 3; ++j) { +				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); +				if (swz < 3) +					srcrgb = 1; +				else if (swz < 4) +					srcalpha = 1; +			} +			source = rc_pair_alloc_source(pair, srcrgb, srcalpha, +							inst->SrcReg[i].File, inst->SrcReg[i].Index); +			pair->RGB.Arg[i].Source = source; +			pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; +			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; +			pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); +		} +		if (needalpha) { +			unsigned int srcrgb = 0; +			unsigned int srcalpha = 0; +			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); +			if (swz < 3) +				srcrgb = 1; +			else if (swz < 4) +				srcalpha = 1; +			source = rc_pair_alloc_source(pair, srcrgb, srcalpha, +							inst->SrcReg[i].File, inst->SrcReg[i].Index); +			pair->Alpha.Arg[i].Source = source; +			pair->Alpha.Arg[i].Swizzle = swz; +			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; +			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); +		} +	} + +	/* Destination handling */ +	if (inst->DstReg.File == RC_FILE_OUTPUT) { +		if (inst->DstReg.Index == c->OutputColor) { +			pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; +			pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); +		} else if (inst->DstReg.Index == c->OutputDepth) { +			pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); +		} +	} else { +		if (needrgb) { +			pair->RGB.DestIndex = inst->DstReg.Index; +			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; +		} +		if (needalpha) { +			pair->Alpha.DestIndex = inst->DstReg.Index; +			pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); +		} +	} + +	if (inst->WriteALUResult) { +		pair->WriteALUResult = inst->WriteALUResult; +		pair->ALUResultCompare = inst->ALUResultCompare; +	} +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct r300_fragment_program_compiler *c) +{ +	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; +	    inst != &c->Base.Program.Instructions; +	    inst = inst->Next) { +		if (inst->Type != RC_INSTRUCTION_NORMAL) +			continue; + +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + +		if (opcode->HasTexture || opcode->IsControlFlow || opcode->Opcode == RC_OPCODE_KIL) +			continue; + +		struct rc_sub_instruction copy = inst->U.I; + +		final_rewrite(©); +		inst->Type = RC_INSTRUCTION_PAIR; +		set_pair_instruction(c, &inst->U.P, ©); +	} +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 68a093b8c0..0dbc5380bb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -138,16 +138,20 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)  	return inst;  } - -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)  { -	struct rc_instruction * inst = rc_alloc_instruction(c); -  	inst->Prev = after;  	inst->Next = after->Next;  	inst->Prev->Next = inst;  	inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ +	struct rc_instruction * inst = rc_alloc_instruction(c); + +	rc_insert_instruction(after, inst);  	return inst;  } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 067cb545fd..33db3ea0ff 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -34,6 +34,7 @@  #include "radeon_opcodes.h"  #include "radeon_code.h"  #include "radeon_program_constants.h" +#include "radeon_program_pair.h"  struct radeon_compiler; @@ -121,6 +122,7 @@ struct rc_instruction {  	rc_instruction_type Type;  	union {  		struct rc_sub_instruction I; +		struct rc_pair_instruction P;  	} U;  	/** @@ -221,6 +223,7 @@ unsigned int rc_find_free_temporary(struct radeon_compiler * c);  struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);  struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);  void rc_remove_instruction(struct rc_instruction * inst);  unsigned int rc_recompute_ips(struct radeon_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 0d8d8e0b3b..ee839596aa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -1,5 +1,5 @@  /* - * Copyright (C) 2008 Nicolai Haehnle. + * Copyright (C) 2008-2009 Nicolai Haehnle.   *   * All Rights Reserved.   * @@ -25,581 +25,29 @@   *   */ -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ -  #include "radeon_program_pair.h" -#include <stdio.h> - -#include "memory_pool.h" -#include "radeon_compiler.h" - -#define error(fmt, args...) do { \ -	rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n",	\ -		__FILE__, __FUNCTION__, ##args);	\ -} while(0) - -struct pair_state_instruction { -	struct rc_sub_instruction Instruction; -	unsigned int IP; /**< Position of this instruction in original program */ - -	unsigned int IsTex:1; /**< Is a texture instruction */ -	unsigned int NeedRGB:1; /**< Needs the RGB ALU */ -	unsigned int NeedAlpha:1; /**< Needs the Alpha ALU */ -	unsigned int IsTranscendent:1; /**< Is a special transcendent instruction */ - -	/** -	 * Number of (read and write) dependencies that must be resolved before -	 * this instruction can be scheduled. -	 */ -	unsigned int NumDependencies:5; - -	/** -	 * Next instruction in the linked list of ready instructions. -	 */ -	struct pair_state_instruction *NextReady; - -	/** -	 * Values that this instruction writes -	 */ -	struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { -	struct pair_state_instruction *Reader; -	struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * RC_FILE_TEMPORARY. - */ -struct reg_value { -	struct pair_state_instruction *Writer; -	struct reg_value *Next; /**< Pointer to the next value to be written to the same RC_FILE_TEMPORARY component */ - -	/** -	 * Unordered linked list of instructions that read from this value. -	 */ -	struct reg_value_reader *Readers; - -	/** -	 * Number of readers of this value. This is calculated during @ref scan_instructions -	 * and continually decremented during code emission. -	 * When this count reaches zero, the instruction that writes the @ref Next value -	 * can be scheduled. -	 */ -	unsigned int NumReaders; -}; - -/** - * Used to translate a RC_FILE_INPUT or RC_FILE_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { -	unsigned int Allocated:1; -	unsigned int HwIndex:8; -	unsigned int RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - -	/** -	 * Notes the value that is currently contained in each component -	 * (only used for RC_FILE_TEMPORARY registers). -	 */ -	struct reg_value *Value[4]; -}; - -struct pair_state { -	struct r300_fragment_program_compiler * Compiler; -	const struct radeon_pair_handler *Handler; -	unsigned int Verbose; -	void *UserData; - -	/** -	 * Translate Mesa registers to hardware registers -	 */ -	struct pair_register_translation Inputs[RC_REGISTER_MAX_INDEX]; -	struct pair_register_translation Temps[RC_REGISTER_MAX_INDEX]; - -	struct { -		unsigned int RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ -	} HwTemps[128]; - -	/** -	 * Linked list of instructions that can be scheduled right now, -	 * based on which ALU/TEX resources they require. -	 */ -	struct pair_state_instruction *ReadyFullALU; -	struct pair_state_instruction *ReadyRGB; -	struct pair_state_instruction *ReadyAlpha; -	struct pair_state_instruction *ReadyTEX; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, rc_register_file file, unsigned int index) -{ -	switch(file) { -	case RC_FILE_TEMPORARY: return &s->Temps[index]; -	case RC_FILE_INPUT: return &s->Inputs[index]; -	default: return 0; -	} -} - -static void alloc_hw_reg(struct pair_state *s, rc_register_file file, unsigned int index, unsigned int hwindex) -{ -	struct pair_register_translation *t = get_register(s, file, index); -	assert(!s->HwTemps[hwindex].RefCount); -	assert(!t->Allocated); -	s->HwTemps[hwindex].RefCount = t->RefCount; -	t->Allocated = 1; -	t->HwIndex = hwindex; -} - -static unsigned int get_hw_reg(struct pair_state *s, rc_register_file file, unsigned int index) -{ -	unsigned int hwindex; - -	struct pair_register_translation *t = get_register(s, file, index); -	if (!t) { -		error("get_hw_reg: %i[%i]\n", file, index); -		return 0; -	} - -	if (t->Allocated) -		return t->HwIndex; - -	for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) -		if (!s->HwTemps[hwindex].RefCount) -			break; - -	if (hwindex >= s->Handler->MaxHwTemps) { -		error("Ran out of hardware temporaries"); -		return 0; -	} - -	alloc_hw_reg(s, file, index, hwindex); -	return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, unsigned int hwindex) -{ -	if (!s->HwTemps[hwindex].RefCount) { -		error("Hwindex %i refcount error", hwindex); -		return; -	} - -	s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ -	pairinst->NextReady = *list; -	*list = pairinst; -} - -/** - * The given instruction has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) -{ -	if (s->Verbose) -		fprintf(stderr, "instruction_ready(%i)\n", pairinst->IP); - -	if (pairinst->IsTex) -		add_pairinst_to_list(&s->ReadyTEX, pairinst); -	else if (!pairinst->NeedAlpha) -		add_pairinst_to_list(&s->ReadyRGB, pairinst); -	else if (!pairinst->NeedRGB) -		add_pairinst_to_list(&s->ReadyAlpha, pairinst); -	else -		add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct rc_sub_instruction *inst) -{ -	struct rc_src_register tmp; - -	switch(inst->Opcode) { -	case RC_OPCODE_ADD: -		inst->SrcReg[2] = inst->SrcReg[1]; -		inst->SrcReg[1].File = RC_FILE_NONE; -		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; -		inst->SrcReg[1].Negate = RC_MASK_NONE; -		inst->Opcode = RC_OPCODE_MAD; -		break; -	case RC_OPCODE_CMP: -		tmp = inst->SrcReg[2]; -		inst->SrcReg[2] = inst->SrcReg[0]; -		inst->SrcReg[0] = tmp; -		break; -	case RC_OPCODE_MOV: -		/* AMD say we should use CMP. -		 * However, when we transform -		 *  KIL -r0; -		 * into -		 *  CMP tmp, -r0, -r0, 0; -		 *  KIL tmp; -		 * we get incorrect behaviour on R500 when r0 == 0.0. -		 * It appears that the R500 KIL hardware treats -0.0 as less -		 * than zero. -		 */ -		inst->SrcReg[1].File = RC_FILE_NONE; -		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; -		inst->SrcReg[2].File = RC_FILE_NONE; -		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; -		inst->Opcode = RC_OPCODE_MAD; -		break; -	case RC_OPCODE_MUL: -		inst->SrcReg[2].File = RC_FILE_NONE; -		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; -		inst->Opcode = RC_OPCODE_MAD; -		break; -	default: -		/* nothing to do */ -		break; -	} -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, -	struct pair_state_instruction *psi) -{ -	psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; -	psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & RC_MASK_W) ? 1 : 0; - -	switch(psi->Instruction.Opcode) { -	case RC_OPCODE_ADD: -	case RC_OPCODE_CMP: -	case RC_OPCODE_DDX: -	case RC_OPCODE_DDY: -	case RC_OPCODE_FRC: -	case RC_OPCODE_MAD: -	case RC_OPCODE_MAX: -	case RC_OPCODE_MIN: -	case RC_OPCODE_MOV: -	case RC_OPCODE_MUL: -		break; -	case RC_OPCODE_COS: -	case RC_OPCODE_EX2: -	case RC_OPCODE_LG2: -	case RC_OPCODE_RCP: -	case RC_OPCODE_RSQ: -	case RC_OPCODE_SIN: -		psi->IsTranscendent = 1; -		psi->NeedAlpha = 1; -		break; -	case RC_OPCODE_DP4: -		psi->NeedAlpha = 1; -		/* fall through */ -	case RC_OPCODE_DP3: -		psi->NeedRGB = 1; -		break; -	case RC_OPCODE_KIL: -	case RC_OPCODE_TEX: -	case RC_OPCODE_TXB: -	case RC_OPCODE_TXP: -		psi->IsTex = 1; -		break; -	default: -		error("Unknown opcode %d\n", psi->Instruction.Opcode); -		break; -	} -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ -	struct rc_instruction *source; -	unsigned int ip; - -	for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; -	    source != &s->Compiler->Base.Program.Instructions; -	    source = source->Next, ++ip) { -		struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); -		memset(pairinst, 0, sizeof(struct pair_state_instruction)); - -		pairinst->Instruction = source->U.I; -		pairinst->IP = ip; -		final_rewrite(s, &pairinst->Instruction); -		classify_instruction(s, pairinst); - -		const struct rc_opcode_info * opcode = rc_get_opcode_info(pairinst->Instruction.Opcode); -		int j; -		for(j = 0; j < opcode->NumSrcRegs; j++) { -			struct pair_register_translation *t = -				get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); -			if (!t) -				continue; - -			t->RefCount++; - -			if (pairinst->Instruction.SrcReg[j].File == RC_FILE_TEMPORARY) { -				int i; -				for(i = 0; i < 4; ++i) { -					unsigned int swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); -					if (swz >= 4) -						continue; /* constant or NIL swizzle */ -					if (!t->Value[swz]) -						continue; /* this is an undefined read */ - -					/* Do not add a dependency if this instruction -					 * also rewrites the value. The code below adds -					 * a dependency for the DstReg, which is a superset -					 * of the SrcReg dependency. */ -					if (pairinst->Instruction.DstReg.File == RC_FILE_TEMPORARY && -					    pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && -					    GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) -						continue; - -					struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); -					pairinst->NumDependencies++; -					t->Value[swz]->NumReaders++; -					r->Reader = pairinst; -					r->Next = t->Value[swz]->Readers; -					t->Value[swz]->Readers = r; -				} -			} -		} - -		if (opcode->HasDstReg) { -			struct pair_register_translation *t = -				get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); -			if (t) { -				t->RefCount++; - -				if (pairinst->Instruction.DstReg.File == RC_FILE_TEMPORARY) { -					int j; -					for(j = 0; j < 4; ++j) { -						if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) -							continue; - -						struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); -						memset(v, 0, sizeof(struct reg_value)); -						v->Writer = pairinst; -						if (t->Value[j]) { -							pairinst->NumDependencies++; -							t->Value[j]->Next = v; -						} -						t->Value[j] = v; -						pairinst->Values[j] = v; -					} -				} -			} -		} - -		if (s->Verbose) -			fprintf(stderr, "scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - -		if (!pairinst->NumDependencies) -			instruction_ready(s, pairinst); -	} - -	/* Clear the RC_FILE_TEMPORARY state */ -	int i, j; -	for(i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { -		for(j = 0; j < 4; ++j) -			s->Temps[i].Value[j] = 0; -	} -} - - -static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) -{ -	assert(pairinst->NumDependencies > 0); -	if (!--pairinst->NumDependencies) -		instruction_ready(s, pairinst); -}  /** - * Update the dependency tracking state based on what the instruction - * at the given IP does. + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore.   */ -static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) -{ -	struct rc_sub_instruction *inst = &pairinst->Instruction; - -	if (s->Verbose) -		fprintf(stderr, "commit_instruction(%i)\n", pairinst->IP); - -	if (inst->DstReg.File == RC_FILE_TEMPORARY) { -		struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; -		deref_hw_reg(s, t->HwIndex); - -		int i; -		for(i = 0; i < 4; ++i) { -			if (!GET_BIT(inst->DstReg.WriteMask, i)) -				continue; - -			t->Value[i] = pairinst->Values[i]; -			if (t->Value[i]->NumReaders) { -				struct reg_value_reader *r; -				for(r = pairinst->Values[i]->Readers; r; r = r->Next) -					decrement_dependencies(s, r->Reader); -			} else if (t->Value[i]->Next) { -				/* This happens when the only reader writes -				 * the register at the same time */ -				decrement_dependencies(s, t->Value[i]->Next->Writer); -			} -		} -	} - -	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); -	int i; -	for(i = 0; i < opcode->NumSrcRegs; i++) { -		struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); -		if (!t) -			continue; - -		deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - -		if (inst->SrcReg[i].File != RC_FILE_TEMPORARY) -			continue; - -		int j; -		for(j = 0; j < 4; ++j) { -			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); -			if (swz >= 4) -				continue; -			if (!t->Value[swz]) -				continue; - -			/* Do not free a dependency if this instruction -			 * also rewrites the value. See scan_instructions. */ -			if (inst->DstReg.File == RC_FILE_TEMPORARY && -			    inst->DstReg.Index == inst->SrcReg[i].Index && -			    GET_BIT(inst->DstReg.WriteMask, swz)) -				continue; - -			if (!--t->Value[swz]->NumReaders) { -				if (t->Value[swz]->Next) -					decrement_dependencies(s, t->Value[swz]->Next->Writer); -			} -		} -	} -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ -	struct pair_state_instruction *readytex; -	struct pair_state_instruction *pairinst; - -	assert(s->ReadyTEX); - -	// Don't let the ready list change under us! -	readytex = s->ReadyTEX; -	s->ReadyTEX = 0; - -	// Allocate destination hardware registers in one block to avoid conflicts. -	for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { -		struct rc_sub_instruction *inst = &pairinst->Instruction; -		if (inst->Opcode != RC_OPCODE_KIL) -			get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); -	} - -	if (s->Compiler->Base.Debug) -		fprintf(stderr, " BEGIN_TEX\n"); - -	if (s->Handler->BeginTexBlock) -		s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); - -	for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { -		struct rc_sub_instruction *inst = &pairinst->Instruction; -		commit_instruction(s, pairinst); - -		if (inst->Opcode != RC_OPCODE_KIL) -			inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); -		inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - -		if (s->Compiler->Base.Debug) { -			/* Should print the TEX instruction here */ -		} - -		struct radeon_pair_texture_instruction rpti; - -		switch(inst->Opcode) { -		case RC_OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; -		case RC_OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; -		case RC_OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; -		default: -		case RC_OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; -		} - -		rpti.DestIndex = inst->DstReg.Index; -		rpti.WriteMask = inst->DstReg.WriteMask; -		rpti.TexSrcUnit = inst->TexSrcUnit; -		rpti.TexSrcTarget = inst->TexSrcTarget; -		rpti.SrcIndex = inst->SrcReg[0].Index; -		rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; - -		s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); -	} - -	if (s->Compiler->Base.Debug) -		fprintf(stderr, " END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, -	struct rc_src_register src, unsigned int rgb, unsigned int alpha) +int rc_pair_alloc_source(struct rc_pair_instruction *pair, +	unsigned int rgb, unsigned int alpha, +	rc_register_file file, unsigned int index)  {  	int candidate = -1;  	int candidate_quality = -1;  	int i; -	if (!rgb && !alpha) +	if ((!rgb && !alpha) || file == RC_FILE_NONE)  		return 0; -	unsigned int constant; -	unsigned int index; - -	if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { -		constant = 0; -		index = get_hw_reg(s, src.File, src.Index); -	} else { -		constant = 1; -		index = src.Index; -	} -  	for(i = 0; i < 3; ++i) {  		int q = 0;  		if (rgb) {  			if (pair->RGB.Src[i].Used) { -				if (pair->RGB.Src[i].Constant != constant || +				if (pair->RGB.Src[i].File != file ||  				    pair->RGB.Src[i].Index != index)  					continue;  				q++; @@ -607,7 +55,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio  		}  		if (alpha) {  			if (pair->Alpha.Src[i].Used) { -				if (pair->Alpha.Src[i].Constant != constant || +				if (pair->Alpha.Src[i].File != file ||  				    pair->Alpha.Src[i].Index != index)  					continue;  				q++; @@ -622,330 +70,15 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio  	if (candidate >= 0) {  		if (rgb) {  			pair->RGB.Src[candidate].Used = 1; -			pair->RGB.Src[candidate].Constant = constant; +			pair->RGB.Src[candidate].File = file;  			pair->RGB.Src[candidate].Index = index;  		}  		if (alpha) {  			pair->Alpha.Src[candidate].Used = 1; -			pair->Alpha.Src[candidate].Constant = constant; +			pair->Alpha.Src[candidate].File = file;  			pair->Alpha.Src[candidate].Index = index;  		}  	}  	return candidate;  } - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static int fill_instruction_into_pair( -	struct pair_state *s, -	struct radeon_pair_instruction *pair, -	struct pair_state_instruction *pairinst) -{ -	struct rc_sub_instruction *inst = &pairinst->Instruction; - -	assert(!pairinst->NeedRGB || pair->RGB.Opcode == RC_OPCODE_NOP); -	assert(!pairinst->NeedAlpha || pair->Alpha.Opcode == RC_OPCODE_NOP); - -	if (pairinst->NeedRGB) { -		if (pairinst->IsTranscendent) -			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; -		else -			pair->RGB.Opcode = inst->Opcode; -		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) -			pair->RGB.Saturate = 1; -	} -	if (pairinst->NeedAlpha) { -		pair->Alpha.Opcode = inst->Opcode; -		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) -			pair->Alpha.Saturate = 1; -	} - -	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); -	int nargs = opcode->NumSrcRegs; -	int i; - -	/* Special case for DDX/DDY (MDH/MDV). */ -	if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { -		if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) -			return 0; -		else -			nargs++; -	} - -	for(i = 0; i < nargs; ++i) { -		int source; -		if (pairinst->NeedRGB && !pairinst->IsTranscendent) { -			unsigned int srcrgb = 0; -			unsigned int srcalpha = 0; -			int j; -			for(j = 0; j < 3; ++j) { -				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); -				if (swz < 3) -					srcrgb = 1; -				else if (swz < 4) -					srcalpha = 1; -			} -			source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); -			if (source < 0) -				return 0; -			pair->RGB.Arg[i].Source = source; -			pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; -			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; -			pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); -		} -		if (pairinst->NeedAlpha) { -			unsigned int srcrgb = 0; -			unsigned int srcalpha = 0; -			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); -			if (swz < 3) -				srcrgb = 1; -			else if (swz < 4) -				srcalpha = 1; -			source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); -			if (source < 0) -				return 0; -			pair->Alpha.Arg[i].Source = source; -			pair->Alpha.Arg[i].Swizzle = swz; -			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; -			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); -		} -	} - -	return 1; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair( -	struct pair_state *s, -	struct radeon_pair_instruction *pair, -	struct pair_state_instruction *pairinst) -{ -	struct rc_sub_instruction *inst = &pairinst->Instruction; - -	if (inst->DstReg.File == RC_FILE_OUTPUT) { -		if (inst->DstReg.Index == s->Compiler->OutputColor) { -			pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; -			pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); -		} else if (inst->DstReg.Index == s->Compiler->OutputDepth) { -			pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); -		} -	} else { -		unsigned int hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); -		if (pairinst->NeedRGB) { -			pair->RGB.DestIndex = hwindex; -			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; -		} -		if (pairinst->NeedAlpha) { -			pair->Alpha.DestIndex = hwindex; -			pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); -		} -	} -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ -	struct radeon_pair_instruction pair; -	struct pair_state_instruction *psi; - -	if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { -		if (s->ReadyFullALU) { -			psi = s->ReadyFullALU; -			s->ReadyFullALU = s->ReadyFullALU->NextReady; -		} else if (s->ReadyRGB) { -			psi = s->ReadyRGB; -			s->ReadyRGB = s->ReadyRGB->NextReady; -		} else { -			psi = s->ReadyAlpha; -			s->ReadyAlpha = s->ReadyAlpha->NextReady; -		} - -		memset(&pair, 0, sizeof(pair)); -		fill_instruction_into_pair(s, &pair, psi); -		fill_dest_into_pair(s, &pair, psi); -		commit_instruction(s, psi); -	} else { -		struct pair_state_instruction **prgb; -		struct pair_state_instruction **palpha; - -		/* Some pairings might fail because they require too -		 * many source slots; try all possible pairings if necessary */ -		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { -			for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { -				struct pair_state_instruction * psirgb = *prgb; -				struct pair_state_instruction * psialpha = *palpha; -				memset(&pair, 0, sizeof(pair)); -				fill_instruction_into_pair(s, &pair, psirgb); -				if (!fill_instruction_into_pair(s, &pair, psialpha)) -					continue; -				*prgb = (*prgb)->NextReady; -				*palpha = (*palpha)->NextReady; -				fill_dest_into_pair(s, &pair, psirgb); -				fill_dest_into_pair(s, &pair, psialpha); -				commit_instruction(s, psirgb); -				commit_instruction(s, psialpha); -				goto success; -			} -		} - -		/* No success in pairing; just take the first RGB instruction */ -		psi = s->ReadyRGB; -		s->ReadyRGB = s->ReadyRGB->NextReady; - -		memset(&pair, 0, sizeof(pair)); -		fill_instruction_into_pair(s, &pair, psi); -		fill_dest_into_pair(s, &pair, psi); -		commit_instruction(s, psi); -	success: ; -	} - -	if (s->Compiler->Base.Debug) -		radeonPrintPairInstruction(&pair); - -	s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - -/* Callback function for assigning input registers to hardware registers */ -static void alloc_helper(void * data, unsigned input, unsigned hwreg) -{ -	struct pair_state * s = data; -	alloc_hw_reg(s, RC_FILE_INPUT, input, hwreg); -} - -void radeonPairProgram( -	struct r300_fragment_program_compiler * compiler, -	const struct radeon_pair_handler* handler, void *userdata) -{ -	struct pair_state s; - -	memset(&s, 0, sizeof(s)); -	s.Compiler = compiler; -	s.Handler = handler; -	s.UserData = userdata; -	s.Verbose = 0 && s.Compiler->Base.Debug; - -	if (s.Compiler->Base.Debug) -		fprintf(stderr, "Emit paired program\n"); - -	scan_instructions(&s); -	s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - -	while(!s.Compiler->Base.Error && -	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { -		if (s.ReadyTEX) -			emit_all_tex(&s); - -		while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) -			emit_alu(&s); -	} - -	if (s.Compiler->Base.Debug) -		fprintf(stderr, " END\n"); -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ -	fprintf(stderr, "  Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(rc_opcode opcode) -{ -	return rc_get_opcode_info(opcode)->Name; -} - -static int num_pairinst_args(rc_opcode opcode) -{ -	return rc_get_opcode_info(opcode)->NumSrcRegs; -} - -static char swizzle_char(rc_swizzle swz) -{ -	switch(swz) { -	case RC_SWIZZLE_X: return 'x'; -	case RC_SWIZZLE_Y: return 'y'; -	case RC_SWIZZLE_Z: return 'z'; -	case RC_SWIZZLE_W: return 'w'; -	case RC_SWIZZLE_ZERO: return '0'; -	case RC_SWIZZLE_ONE: return '1'; -	case RC_SWIZZLE_HALF: return 'H'; -	case RC_SWIZZLE_UNUSED: return '_'; -	default: return '?'; -	} -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ -	int nargs; -	int i; - -	fprintf(stderr, "       RGB:  "); -	for(i = 0; i < 3; ++i) { -		if (inst->RGB.Src[i].Used) -			print_pair_src(i, inst->RGB.Src + i); -	} -	fprintf(stderr, "\n"); -	fprintf(stderr, "       Alpha:"); -	for(i = 0; i < 3; ++i) { -		if (inst->Alpha.Src[i].Used) -			print_pair_src(i, inst->Alpha.Src + i); -	} -	fprintf(stderr, "\n"); - -	fprintf(stderr, "  %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); -	if (inst->RGB.WriteMask) -		fprintf(stderr, " TEMP[%i].%s%s%s", inst->RGB.DestIndex, -			(inst->RGB.WriteMask & 1) ? "x" : "", -			(inst->RGB.WriteMask & 2) ? "y" : "", -			(inst->RGB.WriteMask & 4) ? "z" : ""); -	if (inst->RGB.OutputWriteMask) -		fprintf(stderr, " COLOR.%s%s%s", -			(inst->RGB.OutputWriteMask & 1) ? "x" : "", -			(inst->RGB.OutputWriteMask & 2) ? "y" : "", -			(inst->RGB.OutputWriteMask & 4) ? "z" : ""); -	nargs = num_pairinst_args(inst->RGB.Opcode); -	for(i = 0; i < nargs; ++i) { -		const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; -		const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; -		fprintf(stderr, ", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, -			swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), -			swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), -			swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), -			abs); -	} -	fprintf(stderr, "\n"); - -	fprintf(stderr, "  %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); -	if (inst->Alpha.WriteMask) -		fprintf(stderr, " TEMP[%i].w", inst->Alpha.DestIndex); -	if (inst->Alpha.OutputWriteMask) -		fprintf(stderr, " COLOR.w"); -	if (inst->Alpha.DepthWriteMask) -		fprintf(stderr, " DEPTH.w"); -	nargs = num_pairinst_args(inst->Alpha.Opcode); -	for(i = 0; i < nargs; ++i) { -		const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; -		const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; -		fprintf(stderr, ", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, -			swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); -	} -	fprintf(stderr, "\n"); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 440069d558..1600598428 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -28,22 +28,37 @@  #ifndef __RADEON_PROGRAM_PAIR_H_  #define __RADEON_PROGRAM_PAIR_H_ +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" +  struct r300_fragment_program_compiler;  /** - * Represents a paired instruction, as found in R300 and R500 + * \file + * Represents a paired ALU instruction, as found in R300 and R500   * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate   */ + +  struct radeon_pair_instruction_source { -	unsigned int Index:8; -	unsigned int Constant:1;  	unsigned int Used:1; +	rc_register_file File:3; +	unsigned int Index:RC_REGISTER_INDEX_BITS;  };  struct radeon_pair_instruction_rgb { -	unsigned int Opcode:8; -	unsigned int DestIndex:8; +	rc_opcode Opcode:8; +	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;  	unsigned int WriteMask:3;  	unsigned int OutputWriteMask:3;  	unsigned int Saturate:1; @@ -59,8 +74,8 @@ struct radeon_pair_instruction_rgb {  };  struct radeon_pair_instruction_alpha { -	unsigned int Opcode:8; -	unsigned int DestIndex:8; +	rc_opcode Opcode:8; +	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;  	unsigned int WriteMask:1;  	unsigned int OutputWriteMask:1;  	unsigned int DepthWriteMask:1; @@ -76,66 +91,34 @@ struct radeon_pair_instruction_alpha {  	} Arg[3];  }; -struct radeon_pair_instruction { +struct rc_pair_instruction {  	struct radeon_pair_instruction_rgb RGB;  	struct radeon_pair_instruction_alpha Alpha; -}; - - -enum { -	RADEON_OPCODE_TEX = 0, -	RADEON_OPCODE_TXB, -	RADEON_OPCODE_TXP, -	RADEON_OPCODE_KIL -}; -struct radeon_pair_texture_instruction { -	unsigned int Opcode:2; /**< one of RADEON_OPCODE_xxx */ - -	unsigned int DestIndex:8; -	unsigned int WriteMask:4; - -	unsigned int TexSrcUnit:5; -	unsigned int TexSrcTarget:3; - -	unsigned int SrcIndex:8; -	unsigned int SrcSwizzle:12; +	rc_write_aluresult WriteALUResult:2; +	rc_compare_func ALUResultCompare:3;  };  /** - * + * General helper functions for dealing with the paired instruction format.   */ -struct radeon_pair_handler { -	/** -	 * Write a paired instruction to the hardware. -	 * -	 * @return 0 on error. -	 */ -	int (*EmitPaired)(void*, struct radeon_pair_instruction*); - -	/** -	 * Write a texture instruction to the hardware. -	 * Register indices have already been rewritten to the allocated -	 * hardware register numbers. -	 * -	 * @return 0 on error. -	 */ -	int (*EmitTex)(void*, struct radeon_pair_texture_instruction*); +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, +	unsigned int rgb, unsigned int alpha, +	rc_register_file file, unsigned int index); +/*@}*/ -	/** -	 * Called before a block of contiguous, independent texture -	 * instructions is emitted. -	 */ -	int (*BeginTexBlock)(void*); -	unsigned MaxHwTemps; -}; - -void radeonPairProgram( -	struct r300_fragment_program_compiler * compiler, -	const struct radeon_pair_handler*, void *userdata); +/** + * Compiler passes that operate with the paired format. + */ +/*@{*/ +struct radeon_pair_handler; -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); +void rc_pair_translate(struct r300_fragment_program_compiler *c); +void rc_pair_schedule(struct r300_fragment_program_compiler *c); +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps); +/*@}*/  #endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index fe90a5900e..d863b82d53 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -99,6 +99,21 @@ static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)  	}  } +static char rc_swizzle_char(unsigned int swz) +{ +	switch(swz) { +	case RC_SWIZZLE_X: return 'x'; +	case RC_SWIZZLE_Y: return 'y'; +	case RC_SWIZZLE_Z: return 'z'; +	case RC_SWIZZLE_W: return 'w'; +	case RC_SWIZZLE_ZERO: return '0'; +	case RC_SWIZZLE_ONE: return '1'; +	case RC_SWIZZLE_HALF: return 'H'; +	case RC_SWIZZLE_UNUSED: return '_'; +	} +	return '?'; +} +  static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)  {  	unsigned int comp; @@ -106,16 +121,7 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate  		rc_swizzle swz = GET_SWZ(swizzle, comp);  		if (GET_BIT(negate, comp))  			fprintf(f, "-"); -		switch(swz) { -		case RC_SWIZZLE_X: fprintf(f, "x"); break; -		case RC_SWIZZLE_Y: fprintf(f, "y"); break; -		case RC_SWIZZLE_Z: fprintf(f, "z"); break; -		case RC_SWIZZLE_W: fprintf(f, "w"); break; -		case RC_SWIZZLE_ZERO: fprintf(f, "0"); break; -		case RC_SWIZZLE_ONE: fprintf(f, "1"); break; -		case RC_SWIZZLE_HALF: fprintf(f, "H"); break; -		case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break; -		} +		fprintf(f, "%c", rc_swizzle_char(swz));  	}  } @@ -142,7 +148,7 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src)  		fprintf(f, "|");  } -static void rc_print_instruction(FILE * f, struct rc_instruction * inst) +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)  {  	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);  	unsigned int reg; @@ -190,6 +196,87 @@ static void rc_print_instruction(FILE * f, struct rc_instruction * inst)  	fprintf(f, "\n");  } +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst) +{ +	struct rc_pair_instruction * inst = &fullinst->U.P; +	int printedsrc = 0; + +	for(unsigned int src = 0; src < 3; ++src) { +		if (inst->RGB.Src[src].Used) { +			if (printedsrc) +				fprintf(f, ", "); +			fprintf(f, "src%i.xyz = ", src); +			rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); +			printedsrc = 1; +		} +		if (inst->Alpha.Src[src].Used) { +			if (printedsrc) +				fprintf(f, ", "); +			fprintf(f, "src%i.w = ", src); +			rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); +			printedsrc = 1; +		} +	} +	fprintf(f, "\n"); + +	if (inst->RGB.Opcode != RC_OPCODE_NOP) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + +		fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); +		if (inst->RGB.WriteMask) +			fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, +				(inst->RGB.WriteMask & 1) ? "x" : "", +				(inst->RGB.WriteMask & 2) ? "y" : "", +				(inst->RGB.WriteMask & 4) ? "z" : ""); +		if (inst->RGB.OutputWriteMask) +			fprintf(f, " color.%s%s%s", +				(inst->RGB.OutputWriteMask & 1) ? "x" : "", +				(inst->RGB.OutputWriteMask & 2) ? "y" : "", +				(inst->RGB.OutputWriteMask & 4) ? "z" : ""); +		if (inst->WriteALUResult == RC_ALURESULT_X) +			fprintf(f, " aluresult"); + +		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { +			const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; +			const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; +			fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, +				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), +				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), +				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), +				abs); +		} +		fprintf(f, "\n"); +	} + +	if (inst->Alpha.Opcode != RC_OPCODE_NOP) { +		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + +		fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); +		if (inst->Alpha.WriteMask) +			fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); +		if (inst->Alpha.OutputWriteMask) +			fprintf(f, " color.w"); +		if (inst->Alpha.DepthWriteMask) +			fprintf(f, " depth.w"); +		if (inst->WriteALUResult == RC_ALURESULT_W) +			fprintf(f, " aluresult"); + +		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { +			const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; +			const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; +			fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, +				rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); +		} +		fprintf(f, "\n"); +	} + +	if (inst->WriteALUResult) { +		fprintf(f, "      [aluresult = ("); +		rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); +		fprintf(f, ")]\n"); +	} +} +  /**   * Print program to stderr, default options.   */ @@ -203,7 +290,10 @@ void rc_print_program(const struct rc_program *prog)  	for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {  		fprintf(stderr, "%3d: ", linenum); -		rc_print_instruction(stderr, inst); +		if (inst->Type == RC_INSTRUCTION_PAIR) +			rc_print_pair_instruction(stderr, inst); +		else +			rc_print_normal_instruction(stderr, inst);  		linenum++;  	} | 
