diff options
| author | Christian König <deathsimple@vodafone.de> | 2010-12-22 17:45:51 +0100 | 
|---|---|---|
| committer | Christian König <deathsimple@vodafone.de> | 2011-01-13 23:01:35 +0100 | 
| commit | 96f8f8db7bcddec7ef0fce62cf0e23f1c2fb8c8d (patch) | |
| tree | 1ad79a2825ea574152bcae5806b99067918f8bbc /src/gallium/drivers | |
| parent | d7342f6a81a0d13acb6486a24bffa8e5987d5410 (diff) | |
r600g: rework literal handling
Diffstat (limited to 'src/gallium/drivers')
| -rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 189 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 6 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 221 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r700_asm.c | 10 | 
5 files changed, 151 insertions, 277 deletions
| diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index ca2bf93b0b..e96236e06e 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -608,10 +608,90 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a  	return 0;  } +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) +{ +	switch(value) { +	case 0: +		*sel = V_SQ_ALU_SRC_0; +		break; +	case 1: +		*sel = V_SQ_ALU_SRC_1_INT; +		break; +	case -1: +		*sel = V_SQ_ALU_SRC_M_1_INT; +		break; +	case 0x3F800000: // 1.0f +		*sel = V_SQ_ALU_SRC_1; +		break; +	case 0x3F000000: // 0.5f +		*sel = V_SQ_ALU_SRC_0_5; +		break; +	case 0xBF800000: // -1.0f +		*sel = V_SQ_ALU_SRC_1; +		*neg ^= 1; +		break; +	case 0xBF000000: // -0.5f +		*sel = V_SQ_ALU_SRC_0_5; +		*neg ^= 1; +		break; +	default: +		*sel = V_SQ_ALU_SRC_LITERAL; +		break; +	} +} + +/* compute how many literal are needed */ +static int r600_bc_alu_nliterals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned *nliteral) +{ +	unsigned num_src = r600_bc_get_num_operands(alu); +	unsigned i, j; + +	for (i = 0; i < num_src; ++i) { +		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { +			uint32_t value = alu->src[i].value[alu->src[i].chan]; +			unsigned found = 0; +			for (j = 0; j < *nliteral; ++j) { +				if (literal[j] == value) { +					found = 1; +					break; +				} +			} +			if (!found) { +				if (*nliteral >= 4) +					return -EINVAL; +				literal[(*nliteral)++] = value; +			} +		} +	} +	return 0; +} + +static void r600_bc_alu_adjust_literals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned nliteral) +{ +	unsigned num_src = r600_bc_get_num_operands(alu); +	unsigned i, j; + +	for (i = 0; i < num_src; ++i) { +		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { +			uint32_t value = alu->src[i].value[alu->src[i].chan]; +			for (j = 0; j < nliteral; ++j) { +				if (literal[j] == value) { +					alu->src[i].chan = j; +					break; +				} +			} +		} +	} +} +  static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)  {  	struct r600_bc_alu *prev[5];  	struct r600_bc_alu *result[5] = { NULL }; +	 +	uint32_t literal[4]; +	unsigned nliteral = 0; +  	int i, j, r, src, num_src;  	int num_once_inst = 0; @@ -620,13 +700,12 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s  		return r;  	for (i = 0; i < 5; ++i) { -		// TODO: we have literals? forget it! -		if (prev[i] && prev[i]->nliteral) +		/* check number of literals */ +		if (prev[i] && r600_bc_alu_nliterals(prev[i], literal, &nliteral))  			return 0; -		if (slots[i] && slots[i]->nliteral) +		if (slots[i] && r600_bc_alu_nliterals(slots[i], literal, &nliteral))  			return 0; -  		// let's check used slots  		if (prev[i] && !slots[i]) {  			result[i] = prev[i]; @@ -834,7 +913,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int  	if (nalu == NULL)  		return -ENOMEM;  	memcpy(nalu, alu, sizeof(struct r600_bc_alu)); -	nalu->nliteral = 0;  	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {  		/* check if we could add it anyway */ @@ -880,20 +958,10 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int  		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {  			bc->ngpr = nalu->src[i].sel + 1;  		} -		/* compute how many literal are needed -		 * either 2 or 4 literals -		 */ -		if (nalu->src[i].sel == 253) { -			if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) { -				nalu->nliteral = (nalu->src[i].chan + 2) & 0x6; -			} -		} -	} -	if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { -		lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); -		if (!lalu->last && lalu->nliteral > nalu->nliteral) { -			nalu->nliteral = lalu->nliteral; -		} +		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) +			r600_bc_special_constants( +				nalu->src[i].value[nalu->src[i].chan],  +				&nalu->src[i].sel, &nalu->src[i].neg);  	}  	if (nalu->dst.sel >= bc->ngpr) {  		bc->ngpr = nalu->dst.sel + 1; @@ -938,46 +1006,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)  	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));  } -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) -{ -	struct r600_bc_alu *alu; - -	if (bc->cf_last == NULL) { -		return 0; -	} -	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { -		return 0; -	} -	/* all same on EG */ -	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || -	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { -		return 0; -	} -	/* same on EG */ -	if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && -	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) && -	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) && -	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || -		LIST_IS_EMPTY(&bc->cf_last->alu)) { -		R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); -		return -EINVAL; -	} -	alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); -	if (!alu->last || !alu->nliteral || alu->literal_added) { -		return 0; -	} -	memcpy(alu->value, value, 4 * 4); -	bc->cf_last->ndw += alu->nliteral; -	bc->ndw += alu->nliteral; -	alu->literal_added = 1; -	return 0; -} -  int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)  {  	struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -1134,8 +1162,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign  /* r600 only, r700/eg bits in r700_asm.c */  static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)  { -	unsigned i; -  	/* don't replace gpr by pv or ps for destination register */  	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |  				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | @@ -1172,14 +1198,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign  					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |  					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);  	} -	if (alu->last) { -		if (alu->nliteral && !alu->literal_added) { -			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); -		} -		for (i = 0; i < alu->nliteral; i++) { -			bc->bytecode[id++] = alu->value[i]; -		} -	}  	return 0;  } @@ -1257,8 +1275,10 @@ int r600_bc_build(struct r600_bc *bc)  	struct r600_bc_alu *alu;  	struct r600_bc_vtx *vtx;  	struct r600_bc_tex *tex; +	uint32_t literal[4]; +	unsigned nliteral;  	unsigned addr; -	int r; +	int i, r;  	if (bc->callstack[0].max > 0)  		bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; @@ -1275,6 +1295,16 @@ int r600_bc_build(struct r600_bc *bc)  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): +			nliteral = 0; +			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { +				r = r600_bc_alu_nliterals(alu, literal, &nliteral); +				if (r) +					return r; +				if (alu->last) { +					cf->ndw += align(nliteral, 2); +					nliteral = 0; +				} +			}  			break;  		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:  		case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -1323,7 +1353,12 @@ int r600_bc_build(struct r600_bc *bc)  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):  		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): +			nliteral = 0;  			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { +				r = r600_bc_alu_nliterals(alu, literal, &nliteral); +				if (r) +					return r; +				r600_bc_alu_adjust_literals(alu, literal, nliteral);  				switch(bc->chiprev) {  				case CHIPREV_R600:  					r = r600_bc_alu_build(bc, alu, addr); @@ -1340,7 +1375,10 @@ int r600_bc_build(struct r600_bc *bc)  					return r;  				addr += 2;  				if (alu->last) { -					addr += alu->nliteral; +					for (i = 0; i < align(nliteral, 2); ++i) { +						bc->bytecode[addr++] = literal[i]; +					} +					nliteral = 0;  				}  			}  			break; @@ -1427,6 +1465,8 @@ void r600_bc_dump(struct r600_bc *bc)  	struct r600_bc_tex *tex = NULL;  	unsigned i, id; +	uint32_t literal[4]; +	unsigned nliteral;  	char chip = '6';  	switch (bc->chiprev) { @@ -1513,7 +1553,10 @@ void r600_bc_dump(struct r600_bc *bc)  		}  		id = cf->addr; +		nliteral = 0;  		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { +			r600_bc_alu_nliterals(alu, literal, &nliteral); +  			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);  			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);  			fprintf(stderr, "REL:%d ", alu->src[0].rel); @@ -1548,10 +1591,12 @@ void r600_bc_dump(struct r600_bc *bc)  			id++;  			if (alu->last) { -				for (i = 0; i < alu->nliteral; i++, id++) { +				for (i = 0; i < nliteral; i++, id++) {  					float *f = (float*)(bc->bytecode + id);  					fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);  				} +				id += nliteral & 1; +				nliteral = 0;  			}  		} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 570292e9fd..278b4466cb 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -34,6 +34,7 @@ struct r600_bc_alu_src {  	unsigned			neg;  	unsigned			abs;  	unsigned			rel; +	u32				*value;  };  struct r600_bc_alu_dst { @@ -52,11 +53,8 @@ struct r600_bc_alu {  	unsigned			last;  	unsigned			is_op3;  	unsigned			predicate; -	unsigned			nliteral; -	unsigned			literal_added;  	unsigned			bank_swizzle;  	unsigned			bank_swizzle_force; -	u32				value[4];  	unsigned			omod;  }; @@ -196,13 +194,13 @@ void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);  int r600_bc_init(struct r600_bc *bc, enum radeon_family family);  void r600_bc_clear(struct r600_bc *bc);  int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);  int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);  int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);  int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);  int r600_bc_build(struct r600_bc *bc);  int r600_bc_add_cfinst(struct r600_bc *bc, int inst);  int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);  void r600_bc_dump(struct r600_bc *bc);  void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);  void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 78739bf89d..e85e829bad 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -225,11 +225,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)  	return 0;  } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);  int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)  {  	static int dump_shaders = -1;  	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; +	u32 *literals;  	int r;          /* Would like some magic "get_bool_option_once" routine. @@ -242,12 +243,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s  		tgsi_dump(tokens, 0);  	}  	shader->shader.family = r600_get_family(rctx->radeon); -	r = r600_shader_from_tgsi(tokens, &shader->shader); +	r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);  	if (r) {  		R600_ERR("translation from TGSI failed !\n");  		return r;  	}  	r = r600_bc_build(&shader->shader.bc); +	free(literals);  	if (r) {  		R600_ERR("building bytecode failed !\n");  		return r; @@ -282,7 +284,6 @@ struct r600_shader_ctx {  	struct r600_shader_tgsi_instruction	*inst_info;  	struct r600_bc				*bc;  	struct r600_shader			*shader; -	u32					value[4];  	u32					*literals;  	u32					nliterals;  	u32					max_driver_temp_used; @@ -491,7 +492,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)  	return ctx->num_interp_gpr;  } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)  {  	struct tgsi_full_immediate *immediate;  	struct r600_shader_ctx ctx; @@ -599,9 +600,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s  			r = ctx.inst_info->process(&ctx);  			if (r)  				goto out_err; -			r = r600_bc_add_literal(ctx.bc, ctx.value); -			if (r) -				goto out_err;  			break;  		case TGSI_TOKEN_TYPE_PROPERTY:  			break; @@ -722,7 +720,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s  		if (r)  			goto out_err;  	} -	free(ctx.literals); +	*literals = ctx.literals;  	tgsi_parse_free(&ctx.parse);  	return 0;  out_err: @@ -756,38 +754,13 @@ static int tgsi_src(struct r600_shader_ctx *ctx,  			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {  			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; -			switch(ctx->literals[index]) { -			case 0: -				r600_src->sel = V_SQ_ALU_SRC_0; -				return 0; -			case 1: -				r600_src->sel = V_SQ_ALU_SRC_1_INT; -				return 0; -			case -1: -				r600_src->sel = V_SQ_ALU_SRC_M_1_INT; -				return 0; -			case 0x3F800000: // 1.0f -				r600_src->sel = V_SQ_ALU_SRC_1; +			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); +			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)  				return 0; -			case 0x3F000000: // 0.5f -				r600_src->sel = V_SQ_ALU_SRC_0_5; -				return 0; -			case 0xBF800000: // -1.0f -				r600_src->sel = V_SQ_ALU_SRC_1; -				r600_src->neg ^= 1; -				return 0; -			case 0xBF000000: // -0.5f -				r600_src->sel = V_SQ_ALU_SRC_0_5; -				r600_src->neg ^= 1; -				return 0; -			}  		}  		index = tgsi_src->Register.Index;  		r600_src->sel = V_SQ_ALU_SRC_LITERAL; -		ctx->value[0] = ctx->literals[index * 4 + 0]; -		ctx->value[1] = ctx->literals[index * 4 + 1]; -		ctx->value[2] = ctx->literals[index * 4 + 2]; -		ctx->value[3] = ctx->literals[index * 4 + 3]; +		r600_src->value = ctx->literals + index * 4;  	} else {  		if (tgsi_src->Register.Indirect)  			r600_src->rel = V_SQ_REL_RELATIVE; @@ -893,6 +866,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_  				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);  				alu.src[0].sel = r600_src[i].sel;  				alu.src[0].chan = k; +				alu.src[0].value = r600_src[i].value;  				alu.dst.sel = treg;  				alu.dst.chan = k;  				alu.dst.write = 1; @@ -902,9 +876,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_  				if (r)  					return r;  			} -			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); -			if (r) -				return r;  			r600_src[i].sel = treg;  			j--;  		} @@ -999,12 +970,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)  static int tgsi_setup_trig(struct r600_shader_ctx *ctx,  			   struct r600_bc_alu_src r600_src[3])  { +	static float half_inv_pi = 1.0 /(3.1415926535 * 2); +	static float double_pi = 3.1415926535 * 2; +	static float neg_pi = -3.1415926535; +  	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; -	int r, src0_chan; -	uint32_t lit_vals[4]; +	int r;  	struct r600_bc_alu alu; -	memset(lit_vals, 0, 4*4);  	r = tgsi_split_constant(ctx, r600_src);  	if (r)  		return r; @@ -1012,22 +985,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,  	if (r)  		return r; -	src0_chan = tgsi_chan(&inst->Src[0], 0); - -	/* We are going to feed two literals to the MAD below, -	 * which means that if the first operand is a literal as well, -	 * we need to copy its value manually. -	 */ -	if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) { -		unsigned index = inst->Src[0].Register.Index; - -		lit_vals[2] = ctx->literals[index * 4 + src0_chan]; -		src0_chan = 2; -	} - -	lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); -	lit_vals[1] = fui(0.5f); -  	memset(&alu, 0, sizeof(struct r600_bc_alu));  	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);  	alu.is_op3 = 1; @@ -1037,19 +994,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,  	alu.dst.write = 1;  	alu.src[0] = r600_src[0]; -	alu.src[0].chan = src0_chan; +	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);  	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;  	alu.src[1].chan = 0; -	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; +	alu.src[1].value = (uint32_t *)&half_inv_pi; +	alu.src[2].sel = V_SQ_ALU_SRC_0_5;  	alu.src[2].chan = 1;  	alu.last = 1;  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc, lit_vals); -	if (r) -		return r;  	memset(&alu, 0, sizeof(struct r600_bc_alu));  	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); @@ -1065,14 +1020,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,  	if (r)  		return r; -	if (ctx->bc->chiprev == CHIPREV_R600) { -		lit_vals[0] = fui(3.1415926535897f * 2.0f); -		lit_vals[1] = fui(-3.1415926535897f); -	} else { -		lit_vals[0] = fui(1.0f); -		lit_vals[1] = fui(-0.5f); -	} -  	memset(&alu, 0, sizeof(struct r600_bc_alu));  	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);  	alu.is_op3 = 1; @@ -1088,13 +1035,20 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,  	alu.src[1].chan = 0;  	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;  	alu.src[2].chan = 1; + +	if (ctx->bc->chiprev == CHIPREV_R600) { +		alu.src[1].value = (uint32_t *)&double_pi; +		alu.src[2].value = (uint32_t *)&neg_pi; +	} else { +		alu.src[1].sel = V_SQ_ALU_SRC_1; +		alu.src[2].sel = V_SQ_ALU_SRC_0_5; +		alu.src[2].neg = 1; +	} +  	alu.last = 1;  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc, lit_vals); -	if (r) -		return r;  	return 0;  } @@ -1210,10 +1164,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* dst.w = 1.0; */ @@ -1234,10 +1184,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	return 0; @@ -1273,9 +1219,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)  		if (r)  			return r;  	} -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* kill must be last in ALU */  	ctx->bc->force_add_cf = 1; @@ -1338,10 +1281,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r; -  	if (inst->Dst[0].Register.WriteMask & (1 << 2))  	{  		int chan; @@ -1360,10 +1299,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		chan = alu.dst.chan;  		sel = alu.dst.sel; @@ -1386,9 +1321,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  		/* dst.z = exp(tmp.x) */  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1432,9 +1364,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* replicate result */  	return tgsi_helper_tempx_replicate(ctx);  } @@ -1483,9 +1412,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* replicate result */  	return tgsi_helper_tempx_replicate(ctx);  } @@ -1509,9 +1435,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc,ctx->value); -	if (r) -		return r;  	/* b * LOG2(a) */  	memset(&alu, 0, sizeof(struct r600_bc_alu));  	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -1526,9 +1449,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc,ctx->value); -	if (r) -		return r;  	/* POW(a,b) = EXP2(b * LOG2(a))*/  	memset(&alu, 0, sizeof(struct r600_bc_alu));  	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1539,9 +1459,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)  	r = r600_bc_add_alu(ctx->bc, &alu);  	if (r)  		return r; -	r = r600_bc_add_literal(ctx->bc,ctx->value); -	if (r) -		return r;  	return tgsi_helper_tempx_replicate(ctx);  } @@ -1581,9 +1498,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)  		if (r)  			return r;  	} -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* dst = (-tmp > 0 ? -1 : tmp) */  	for (i = 0; i < 4; i++) { @@ -1618,9 +1532,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru  	struct r600_bc_alu alu;  	int i, r; -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	for (i = 0; i < 4; i++) {  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -1749,6 +1660,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)  static int tgsi_tex(struct r600_shader_ctx *ctx)  { +	static float one_point_five = 1.5f;  	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;  	struct r600_bc_tex tex;  	struct r600_bc_alu alu; @@ -1758,7 +1670,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)  	boolean src_not_temp =  		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&  		inst->Src[0].Register.File != TGSI_FILE_INPUT; -	uint32_t lit_vals[4];  	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1887,6 +1798,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)  		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;  		alu.src[2].chan = 0; +		alu.src[2].value = (u32*)&one_point_five;  		alu.dst.sel = ctx->temp_reg;  		alu.dst.chan = 0; @@ -1907,6 +1819,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)  		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;  		alu.src[2].chan = 0; +		alu.src[2].value = (u32*)&one_point_five;  		alu.dst.sel = ctx->temp_reg;  		alu.dst.chan = 1; @@ -1917,11 +1830,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		lit_vals[0] = fui(1.5f); - -		r = r600_bc_add_literal(ctx->bc, lit_vals); -		if (r) -			return r;  		src_not_temp = FALSE;  		src_gpr = ctx->temp_reg;  	} @@ -2055,9 +1963,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)  		if (r)  			return r;  	} -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* (1 - src0) * src2 */  	for (i = 0; i < lasti + 1; i++) { @@ -2080,9 +1985,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)  		if (r)  			return r;  	} -	r = r600_bc_add_literal(ctx->bc, ctx->value); -	if (r) -		return r;  	/* src0 * src1 + (1 - src0) * src2 */  	for (i = 0; i < lasti + 1; i++) { @@ -2223,10 +2125,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	for (i = 0; i < 4; i++) { @@ -2284,10 +2182,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	if (use_temp)  		return tgsi_helper_copy(ctx, inst); @@ -2320,10 +2214,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);  		alu.src[0].sel = ctx->temp_reg;  		alu.src[0].chan = 0; @@ -2335,10 +2225,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.y = tmp - floor(tmp); */ @@ -2364,9 +2250,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.z = RoughApprox2ToX(tmp);*/ @@ -2387,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.w = 1.0;*/ @@ -2407,9 +2287,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	return tgsi_helper_copy(ctx, inst);  } @@ -2439,10 +2316,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);  		alu.src[0].sel = ctx->temp_reg;  		alu.src[0].chan = 0; @@ -2455,10 +2328,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.y = src.x / (2 ^ floor(log2(src.x))); */ @@ -2481,10 +2350,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); @@ -2500,10 +2365,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2519,10 +2380,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2538,10 +2395,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		if (r)  			return r; -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r; -  		memset(&alu, 0, sizeof(struct r600_bc_alu));  		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2563,10 +2416,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.z = log2(src);*/ @@ -2588,10 +2437,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	/* result.w = 1.0; */ @@ -2610,10 +2455,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)  		r = r600_bc_add_alu(ctx->bc, &alu);  		if (r)  			return r; - -		r = r600_bc_add_literal(ctx->bc, ctx->value); -		if (r) -			return r;  	}  	return tgsi_helper_copy(ctx, inst); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 35b0331525..935dd6fe3a 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -47,6 +47,6 @@ struct r600_shader {  	boolean			uses_kill;  }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);  #endif diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 3eb6fb50ca..a7f2f54736 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -29,8 +29,6 @@  int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)  { -	unsigned i; -  	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |  		S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |  		S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | @@ -67,13 +65,5 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)  			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |  		 	                S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);  	} -	if (alu->last) { -		if (alu->nliteral && !alu->literal_added) { -			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); -		} -		for (i = 0; i < alu->nliteral; i++) { -			bc->bytecode[id++] = alu->value[i]; -		} -	}  	return 0;  } | 
