diff options
| author | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2009-09-15 11:44:19 +0200 | 
|---|---|---|
| committer | Christoph Bumiller <e0425955@student.tuwien.ac.at> | 2009-09-15 12:13:22 +0200 | 
| commit | 234d82ed632f8b53eec805383874de4fd00eff2a (patch) | |
| tree | e452c547388cb15aa84126e4dacda3981e76aaec | |
| parent | 2da8608ad795d29fe7a908eaee3d6dd33e829f3e (diff) | |
nv50: add nv50_tgsi_insn to handle swizzles safely
| -rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 147 | 
1 files changed, 128 insertions, 19 deletions
| diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index a92ceee6ab..10552ee328 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -112,6 +112,10 @@ struct nv50_pc {  	struct nv50_reg *temp_temp[16];  	unsigned temp_temp_nr; +	/* broadcast and destination replacement regs */ +	struct nv50_reg *r_brdc; +	struct nv50_reg *r_dst[4]; +  	unsigned interp_mode[32];  	/* perspective interpolation registers */  	struct nv50_reg *iv_p; @@ -890,6 +894,12 @@ emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)  	emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);  } +static INLINE void +emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); +} +  static void  emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,  	 struct nv50_reg **src) @@ -1389,26 +1399,24 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)  }  static boolean -nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +nv50_program_tx_insn(struct nv50_pc *pc, +		     const struct tgsi_full_instruction *inst)  { -	const struct tgsi_full_instruction *inst = &tok->FullInstruction;  	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp;  	unsigned mask, sat, unit; -	boolean assimilate = FALSE;  	int i, c;  	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;  	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; +	memset(src, 0, sizeof(src)); +  	for (c = 0; c < 4; c++) { -		if (mask & (1 << c)) +		if ((mask & (1 << c)) && !pc->r_dst[c])  			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);  		else -			dst[c] = NULL; -		rdst[c] = NULL; -		src[0][c] = NULL; -		src[1][c] = NULL; -		src[2][c] = NULL; +			dst[c] = pc->r_dst[c]; +		rdst[c] = dst[c];  	}  	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1429,6 +1437,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  	if (sat) {  		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) +				continue;  			rdst[c] = dst[c];  			dst[c] = temp_temp(pc);  		} @@ -1700,13 +1710,11 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  		for (c = 0; c < 4; c++) {  			if (!(mask & (1 << c)))  				continue; -			emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, -				 CVT_F32_F32); +			/* in this case we saturate later */ +			if (dst[c]->type == P_TEMP && dst[c]->index < 0) +				continue; +			emit_sat(pc, rdst[c], dst[c]);  		} -	} else if (assimilate) { -		for (c = 0; c < 4; c++) -			if (rdst[c]) -				assimilate_temp(pc, rdst[c], dst[c]);  	}  	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -1715,9 +1723,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)  				continue;  			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)  				FREE(src[i][c]); -			else -			if (src[i][c]->acc == pc->insn_cur) -				release_hw(pc, src[i][c]);  		}  	} @@ -1856,6 +1861,110 @@ tgsi_broadcast_dst(struct nv50_pc *pc,  	return NULL;  } +/* Scan source swizzles and return a bitmask indicating dst regs that + * also occur among the src regs, and fill rdep for nv50_revdep_reoder. + */ +static unsigned +nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, +		       unsigned rdep[4]) +{ +	const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; +	const struct tgsi_full_src_register *fs; +	unsigned i, deqs = 0; + +	for (i = 0; i < 4; ++i) +		rdep[i] = 0; + +	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { +		unsigned chn, mask = nv50_tgsi_src_mask(insn, i); +		boolean neg_supp = negate_supported(insn, i); + +		fs = &insn->FullSrcRegisters[i]; +		if (fs->SrcRegister.File != fd->DstRegister.File || +		    fs->SrcRegister.Index != fd->DstRegister.Index) +			continue; + +		for (chn = 0; chn < 4; ++chn) { +			unsigned s, c; + +			if (!(mask & (1 << chn))) /* src is not read */ +				continue; +			c = tgsi_util_get_full_src_register_extswizzle(fs, chn); +			s = tgsi_util_get_full_src_register_sign_mode(fs, chn); + +			if (c > TGSI_EXTSWIZZLE_W || +			    !(fd->DstRegister.WriteMask & (1 << c))) +				continue; + +			/* no danger if src is copied to TEMP first */ +			if ((s != TGSI_UTIL_SIGN_KEEP) && +			    (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) +				continue; + +			rdep[c] |= nv50_tgsi_dst_revdep( +				insn->Instruction.Opcode, i, chn); +			deqs |= (1 << c); +		} +	} + +	return deqs; +} + +static boolean +nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ +	struct tgsi_full_instruction insn = tok->FullInstruction; +	const struct tgsi_full_dst_register *fd; +	unsigned i, deqs, rdep[4], m[4]; + +	fd = &tok->FullInstruction.FullDstRegisters[0]; +	deqs = nv50_tgsi_scan_swizzle(&insn, rdep); + +	if (is_scalar_op(insn.Instruction.Opcode)) { +		pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); +		if (!pc->r_brdc) +			pc->r_brdc = temp_temp(pc); +		return nv50_program_tx_insn(pc, &insn); +	} +	pc->r_brdc = NULL; + +	if (!deqs) +		return nv50_program_tx_insn(pc, &insn); + +	deqs = nv50_revdep_reorder(m, rdep); + +	for (i = 0; i < 4; ++i) { +		assert(pc->r_dst[m[i]] == NULL); + +		insn.FullDstRegisters[0].DstRegister.WriteMask = +			fd->DstRegister.WriteMask & (1 << m[i]); + +		if (!insn.FullDstRegisters[0].DstRegister.WriteMask) +			continue; + +		if (deqs & (1 << i)) +			pc->r_dst[m[i]] = alloc_temp(pc, NULL); + +		if (!nv50_program_tx_insn(pc, &insn)) +			return FALSE; +	} + +	for (i = 0; i < 4; i++) { +		struct nv50_reg *reg = pc->r_dst[i]; +		if (!reg) +			continue; +		pc->r_dst[i] = NULL; + +		if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) +			emit_sat(pc, tgsi_dst(pc, i, fd), reg); +		else +			emit_mov(pc, tgsi_dst(pc, i, fd), reg); +		free_temp(pc, reg); +	} + +	return TRUE; +} +  static unsigned  load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,  	       int *aid, int *p_oid) @@ -2255,7 +2364,7 @@ nv50_program_tx(struct nv50_program *p)  		switch (tok->Token.Type) {  		case TGSI_TOKEN_TYPE_INSTRUCTION:  			++pc->insn_cur; -			ret = nv50_program_tx_insn(pc, tok); +			ret = nv50_tgsi_insn(pc, tok);  			if (ret == FALSE)  				goto out_err;  			break; | 
