diff options
| -rw-r--r-- | src/gallium/drivers/nv30/nv30_fragprog.c | 172 | ||||
| -rw-r--r-- | src/gallium/drivers/nv30/nv30_shader.h | 323 | ||||
| -rw-r--r-- | src/gallium/drivers/nv30/nv30_vertprog.c | 134 | ||||
| -rw-r--r-- | src/gallium/drivers/nv40/nv40_draw.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nv40/nv40_fragprog.c | 160 | ||||
| -rw-r--r-- | src/gallium/drivers/nv40/nv40_shader.h | 380 | ||||
| -rw-r--r-- | src/gallium/drivers/nv40/nv40_vertprog.c | 166 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_shader.h | 407 | 
8 files changed, 726 insertions, 1018 deletions
| diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index ae246ffd64..4ce16b8f0e 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -19,14 +19,14 @@  #define MASK_Z 4  #define MASK_W 8  #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)  #define MAX_CONSTS 128  #define MAX_IMM 32 @@ -50,21 +50,21 @@ struct nv30_fpc {  	} consts[MAX_CONSTS];  	int nr_consts; -	struct nv30_sreg imm[MAX_IMM]; +	struct nvfx_sreg imm[MAX_IMM];  	unsigned nr_imm;  }; -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  temp(struct nv30_fpc *fpc)  {  	int idx;  	idx  = fpc->temp_temp_count++;  	idx += fpc->high_temp + 1; -	return nv30_sr(NV30SR_TEMP, idx); +	return nvfx_sr(NVFXSR_TEMP, idx);  } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  constant(struct nv30_fpc *fpc, int pipe, float vals[4])  {  	int idx; @@ -76,14 +76,14 @@ constant(struct nv30_fpc *fpc, int pipe, float vals[4])  	fpc->consts[idx].pipe = pipe;  	if (pipe == -1)  		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); -	return nv30_sr(NV30SR_CONST, idx); +	return nvfx_sr(NVFXSR_CONST, idx);  }  #define arith(cc,s,o,d,m,s0,s1,s2) \ -	nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ +	nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \  			(d), (m), (s0), (s1), (s2))  #define tex(cc,s,o,u,d,m,s0,s1,s2) \ -	nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ +	nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \  		    (d), (m), (s0), none, none)  static void @@ -96,25 +96,25 @@ grow_insns(struct nv30_fpc *fpc, int size)  }  static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw = &fp->insn[fpc->inst_offset];  	uint32_t sr = 0;  	switch (src.type) { -	case NV30SR_INPUT: -		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); -		hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); +	case NVFXSR_INPUT: +		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); +		hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);  		break; -	case NV30SR_OUTPUT: -		sr |= NV30_FP_REG_SRC_HALF; +	case NVFXSR_OUTPUT: +		sr |= NVFX_FP_REG_SRC_HALF;  		/* fall-through */ -	case NV30SR_TEMP: -		sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); -		sr |= (src.index << NV30_FP_REG_SRC_SHIFT); +	case NVFXSR_TEMP: +		sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); +		sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);  		break; -	case NV30SR_CONST: +	case NVFXSR_CONST:  		grow_insns(fpc, 4);  		hw = &fp->insn[fpc->inst_offset];  		if (fpc->consts[src.index].pipe >= 0) { @@ -132,61 +132,61 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)  				sizeof(uint32_t) * 4);  		} -		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); +		sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);  		break; -	case NV30SR_NONE: -		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); +	case NVFXSR_NONE: +		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);  		break;  	default:  		assert(0);  	}  	if (src.negate) -		sr |= NV30_FP_REG_NEGATE; +		sr |= NVFX_FP_REG_NEGATE;  	if (src.abs)  		hw[1] |= (1 << (29 + pos)); -	sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | -	       (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | -	       (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | -	       (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); +	sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | +	       (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | +	       (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | +	       (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));  	hw[pos + 1] |= sr;  }  static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw = &fp->insn[fpc->inst_offset];  	switch (dst.type) { -	case NV30SR_TEMP: +	case NVFXSR_TEMP:  		if (fpc->num_regs < (dst.index + 1))  			fpc->num_regs = dst.index + 1;  		break; -	case NV30SR_OUTPUT: +	case NVFXSR_OUTPUT:  		if (dst.index == 1) {  			fp->fp_control |= 0xe;  		} else { -			hw[0] |= NV30_FP_OP_OUT_REG_HALF; +			hw[0] |= NVFX_FP_OP_OUT_REG_HALF;  		}  		break; -	case NV30SR_NONE: +	case NVFXSR_NONE:  		hw[0] |= (1 << 30);  		break;  	default:  		assert(0);  	} -	hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); +	hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);  }  static void  nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, -	      struct nv30_sreg dst, int mask, -	      struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +	      struct nvfx_sreg dst, int mask, +	      struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw; @@ -196,22 +196,22 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,  	hw = &fp->insn[fpc->inst_offset];  	memset(hw, 0, sizeof(uint32_t) * 4); -	if (op == NV30_FP_OP_OPCODE_KIL) +	if (op == NVFX_FP_OP_OPCODE_KIL)  		fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; -	hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); -	hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); -	hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); +	hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); +	hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); +	hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);  	if (sat) -		hw[0] |= NV30_FP_OP_OUT_SAT; +		hw[0] |= NVFX_FP_OP_OUT_SAT;  	if (dst.cc_update) -		hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; -	hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); -	hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | -		  (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | -		  (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | -		  (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); +		hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; +	hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); +	hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | +		  (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | +		  (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | +		  (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));  	emit_dst(fpc, dst);  	emit_src(fpc, 0, s0); @@ -221,25 +221,25 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op,  static void  nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, -	    struct nv30_sreg dst, int mask, -	    struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +	    struct nvfx_sreg dst, int mask, +	    struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); -	fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); +	fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);  	fp->samplers |= (1 << unit);  } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)  { -	struct nv30_sreg src; +	struct nvfx_sreg src;  	switch (fsrc->Register.File) {  	case TGSI_FILE_INPUT: -		src = nv30_sr(NV30SR_INPUT, +		src = nvfx_sr(NVFXSR_INPUT,  			      fpc->attrib_map[fsrc->Register.Index]);  		break;  	case TGSI_FILE_CONSTANT: @@ -250,7 +250,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)  		src = fpc->imm[fsrc->Register.Index];  		break;  	case TGSI_FILE_TEMPORARY: -		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); +		src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1);  		if (fpc->high_temp < src.index)  			fpc->high_temp = src.index;  		break; @@ -259,9 +259,9 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)  	 */  	case TGSI_FILE_OUTPUT:  		if (fsrc->Register.Index == fpc->colour_id) -			return nv30_sr(NV30SR_OUTPUT, 0); +			return nvfx_sr(NVFXSR_OUTPUT, 0);  		else -			return nv30_sr(NV30SR_OUTPUT, 1); +			return nvfx_sr(NVFXSR_OUTPUT, 1);  		break;  	default:  		NOUVEAU_ERR("bad src file\n"); @@ -277,27 +277,27 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)  	return src;  } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {  	int idx;  	switch (fdst->Register.File) {  	case TGSI_FILE_OUTPUT:  		if (fdst->Register.Index == fpc->colour_id) -			return nv30_sr(NV30SR_OUTPUT, 0); +			return nvfx_sr(NVFXSR_OUTPUT, 0);  		else -			return nv30_sr(NV30SR_OUTPUT, 1); +			return nvfx_sr(NVFXSR_OUTPUT, 1);  		break;  	case TGSI_FILE_TEMPORARY:  		idx = fdst->Register.Index + 1;  		if (fpc->high_temp < idx)  			fpc->high_temp = idx; -		return nv30_sr(NV30SR_TEMP, idx); +		return nvfx_sr(NVFXSR_TEMP, idx);  	case TGSI_FILE_NULL: -		return nv30_sr(NV30SR_NONE, 0); +		return nvfx_sr(NVFXSR_NONE, 0);  	default:  		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); -		return nv30_sr(NV30SR_NONE, 0); +		return nvfx_sr(NVFXSR_NONE, 0);  	}  } @@ -315,10 +315,10 @@ tgsi_mask(uint tgsi)  static boolean  src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, -	       struct nv30_sreg *src) +	       struct nvfx_sreg *src)  { -	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); -	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); +	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); +	struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);  	uint mask = 0;  	uint c; @@ -350,8 +350,8 @@ static boolean  nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  				const struct tgsi_full_instruction *finst)  { -	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); -	struct nv30_sreg src[3], dst, tmp; +	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); +	struct nvfx_sreg src[3], dst, tmp;  	int mask, sat, unit = 0;  	int ai = -1, ci = -1;  	int i; @@ -435,12 +435,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_CMP: -		tmp = nv30_sr(NV30SR_NONE, 0); +		tmp = nvfx_sr(NVFXSR_NONE, 0);  		tmp.cc_update = 1;  		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); -		dst.cc_test = NV30_VP_INST_COND_GE; +		dst.cc_test = NVFX_VP_INST_COND_GE;  		arith(fpc, sat, MOV, dst, mask, src[2], none, none); -		dst.cc_test = NV30_VP_INST_COND_LT; +		dst.cc_test = NVFX_VP_INST_COND_LT;  		arith(fpc, sat, MOV, dst, mask, src[1], none, none);  		break;  	case TGSI_OPCODE_COS: @@ -474,10 +474,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  		arith(fpc, 0, KIL, none, 0, none, none, none);  		break;  	case TGSI_OPCODE_KIL: -		dst = nv30_sr(NV30SR_NONE, 0); +		dst = nvfx_sr(NVFXSR_NONE, 0);  		dst.cc_update = 1;  		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); -		dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; +		dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;  		arith(fpc, 0, KIL, dst, 0, none, none, none);  		break;  	case TGSI_OPCODE_LG2: @@ -485,7 +485,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  		break;  //	case TGSI_OPCODE_LIT:  	case TGSI_OPCODE_LRP: -		arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); +		arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);  		break;  	case TGSI_OPCODE_MAD:  		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -503,7 +503,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_POW: -		arith(fpc, sat, POW, dst, mask, src[0], src[1], none); +		arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_RCP:  		arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -512,10 +512,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,  		assert(0);  		break;  	case TGSI_OPCODE_RFL: -		arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); +		arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_RSQ: -		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); +		arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);  		break;  	case TGSI_OPCODE_SCS:  		/* avoid overwriting the source */ @@ -590,25 +590,25 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,  	switch (fdec->Semantic.Name) {  	case TGSI_SEMANTIC_POSITION: -		hw = NV30_FP_OP_INPUT_SRC_POSITION; +		hw = NVFX_FP_OP_INPUT_SRC_POSITION;  		break;  	case TGSI_SEMANTIC_COLOR:  		if (fdec->Semantic.Index == 0) { -			hw = NV30_FP_OP_INPUT_SRC_COL0; +			hw = NVFX_FP_OP_INPUT_SRC_COL0;  		} else  		if (fdec->Semantic.Index == 1) { -			hw = NV30_FP_OP_INPUT_SRC_COL1; +			hw = NVFX_FP_OP_INPUT_SRC_COL1;  		} else {  			NOUVEAU_ERR("bad colour semantic index\n");  			return FALSE;  		}  		break;  	case TGSI_SEMANTIC_FOG: -		hw = NV30_FP_OP_INPUT_SRC_FOGC; +		hw = NVFX_FP_OP_INPUT_SRC_FOGC;  		break;  	case TGSI_SEMANTIC_GENERIC:  		if (fdec->Semantic.Index <= 7) { -			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. +			hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.  						     Index);  		} else {  			NOUVEAU_ERR("bad generic semantic index\n"); @@ -702,7 +702,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)  	tgsi_parse_free(&p);  	/*if (++high_temp) { -		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); +		fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));  		for (i = 0; i < high_temp; i++)  			fpc->r_temp[i] = temp(fpc);  		fpc->r_temps_discard = 0; diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h index dd3a36f78f..f19efb5aa4 100644 --- a/src/gallium/drivers/nv30/nv30_shader.h +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -72,14 +72,6 @@  #define NV30_VP_INST_COND_TEST_ENABLE        (1<<14)  #define NV30_VP_INST_COND_SHIFT          11  #define NV30_VP_INST_COND_MASK          (0x07 << 11) -#  define NV30_VP_INST_COND_FL  0 /* guess */   -#  define NV30_VP_INST_COND_LT  1   -#  define NV30_VP_INST_COND_EQ  2 -#  define NV30_VP_INST_COND_LE  3 -#  define NV30_VP_INST_COND_GT  4 -#  define NV30_VP_INST_COND_NE  5 -#  define NV30_VP_INST_COND_GE  6 -#  define NV30_VP_INST_COND_TR  7 /* guess */  #define NV30_VP_INST_COND_SWZ_X_SHIFT        9  #define NV30_VP_INST_COND_SWZ_X_MASK        (0x03 <<  9)  #define NV30_VP_INST_COND_SWZ_Y_SHIFT        7 @@ -98,59 +90,12 @@  /* DWORD 1 */  #define NV30_VP_INST_SCA_OPCODEL_SHIFT        28  #define NV30_VP_INST_SCA_OPCODEL_MASK        (0x0F << 28) -#  define NV30_VP_INST_OP_NOP  0x00 -#  define NV30_VP_INST_OP_RCP  0x02 -#  define NV30_VP_INST_OP_RCC  0x03 -#  define NV30_VP_INST_OP_RSQ  0x04 -#  define NV30_VP_INST_OP_EXP  0x05 -#  define NV30_VP_INST_OP_LOG  0x06 -#  define NV30_VP_INST_OP_LIT  0x07 -#  define NV30_VP_INST_OP_BRA  0x09 -#  define NV30_VP_INST_OP_CAL  0x0B -#  define NV30_VP_INST_OP_RET  0x0C -#  define NV30_VP_INST_OP_LG2  0x0D -#  define NV30_VP_INST_OP_EX2  0x0E -#  define NV30_VP_INST_OP_SIN  0x0F -#  define NV30_VP_INST_OP_COS  0x10  #define NV30_VP_INST_VEC_OPCODE_SHIFT        23  #define NV30_VP_INST_VEC_OPCODE_MASK        (0x1F << 23) -#  define NV30_VP_INST_OP_NOPV  0x00 -#  define NV30_VP_INST_OP_MOV  0x01 -#  define NV30_VP_INST_OP_MUL  0x02 -#  define NV30_VP_INST_OP_ADD  0x03 -#  define NV30_VP_INST_OP_MAD  0x04 -#  define NV30_VP_INST_OP_DP3  0x05 -#  define NV30_VP_INST_OP_DP4  0x07 -#  define NV30_VP_INST_OP_DPH  0x06 -#  define NV30_VP_INST_OP_DST  0x08 -#  define NV30_VP_INST_OP_MIN  0x09 -#  define NV30_VP_INST_OP_MAX  0x0A -#  define NV30_VP_INST_OP_SLT  0x0B -#  define NV30_VP_INST_OP_SGE  0x0C -#  define NV30_VP_INST_OP_ARL  0x0D -#  define NV30_VP_INST_OP_FRC  0x0E -#  define NV30_VP_INST_OP_FLR  0x0F -#  define NV30_VP_INST_OP_SEQ  0x10 -#  define NV30_VP_INST_OP_SFL  0x11 -#  define NV30_VP_INST_OP_SGT  0x12 -#  define NV30_VP_INST_OP_SLE  0x13 -#  define NV30_VP_INST_OP_SNE  0x14 -#  define NV30_VP_INST_OP_STR  0x15 -#  define NV30_VP_INST_OP_SSG  0x16 -#  define NV30_VP_INST_OP_ARR  0x17 -#  define NV30_VP_INST_OP_ARA  0x18  #define NV30_VP_INST_CONST_SRC_SHIFT        14  #define NV30_VP_INST_CONST_SRC_MASK        (0xFF << 14)  #define NV30_VP_INST_INPUT_SRC_SHIFT        9    /*NV20*/  #define NV30_VP_INST_INPUT_SRC_MASK        (0x0F <<  9)  /*NV20*/ -#  define NV30_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */ -#  define NV30_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */ -#  define NV30_VP_INST_IN_NORMAL  2     -#  define NV30_VP_INST_IN_COL0  3    /* Should probably confirm them all though */ -#  define NV30_VP_INST_IN_COL1  4 -#  define NV30_VP_INST_IN_FOGC  5 -#  define NV30_VP_INST_IN_TC0  8 -#  define NV30_VP_INST_IN_TC(n)  (8+n)  #define NV30_VP_INST_SRC0H_SHIFT        0    /*NV20*/  #define NV30_VP_INST_SRC0H_MASK          (0x1FF << 0)  /*NV20*/ @@ -190,8 +135,6 @@  #  define NV30_VP_INST_DEST_PSZ   6  #  define NV30_VP_INST_DEST_TC(n)  (8+n) -#define NV30_VP_INST_LAST                           (1 << 0) -  /* Useful to split the source selection regs into their pieces */  #define NV30_VP_SRC0_HIGH_SHIFT                                                6  #define NV30_VP_SRC0_HIGH_MASK                                        0x00007FC0 @@ -221,270 +164,6 @@  #define NV30_VP_SRC_REG_TYPE_INPUT  2  #define NV30_VP_SRC_REG_TYPE_CONST  3 /* guess */ -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - *   0 - Opcode, output reg/mask, ATTRIB source - *   1 - Source 0 - *   2 - Source 1 - *   3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - *     result.color == R0.xyzw - *     result.depth == R1.z - * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 - * otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - *  - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords.  As such instructions such as: - *  - *     ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO - * is implemented simply by not writing to the relevant components of the destination. - * - * Conditional execution - *   TODO - *  - * Non-native instructions: - *   LIT - *   LRP - MAD+MAD - *   SUB - ADD, negate second source - *   RSQ - LG2 + EX2 - *   POW - LG2 + MUL + EX2 - *   SCS - COS + SIN - *   XPD - */ - -//== Opcode / Destination selection == -#define NV30_FP_OP_PROGRAM_END          (1 << 0) -#define NV30_FP_OP_OUT_REG_SHIFT        1 -#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */ -/* Needs to be set when writing outputs to get expected result.. */ -#define NV30_FP_OP_OUT_REG_HALF          (1 << 7) -#define NV30_FP_OP_COND_WRITE_ENABLE        (1 << 8) -#define NV30_FP_OP_OUTMASK_SHIFT        9 -#define NV30_FP_OP_OUTMASK_MASK          (0xF << 9) -#  define NV30_FP_OP_OUT_X  (1<<9) -#  define NV30_FP_OP_OUT_Y  (1<<10) -#  define NV30_FP_OP_OUT_Z  (1<<11) -#  define NV30_FP_OP_OUT_W  (1<<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV30_FP_OP_INPUT_SRC_SHIFT        13 -#define NV30_FP_OP_INPUT_SRC_MASK        (15 << 13) -#  define NV30_FP_OP_INPUT_SRC_POSITION  0x0 -#  define NV30_FP_OP_INPUT_SRC_COL0  0x1 -#  define NV30_FP_OP_INPUT_SRC_COL1  0x2 -#  define NV30_FP_OP_INPUT_SRC_FOGC  0x3 -#  define NV30_FP_OP_INPUT_SRC_TC0    0x4 -#  define NV30_FP_OP_INPUT_SRC_TC(n)  (0x4 + n) -#define NV30_FP_OP_TEX_UNIT_SHIFT        17 -#define NV30_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */ -#define NV30_FP_OP_PRECISION_SHIFT        22 -#define NV30_FP_OP_PRECISION_MASK        (3 << 22) -#   define NV30_FP_PRECISION_FP32  0 -#   define NV30_FP_PRECISION_FP16  1 -#   define NV30_FP_PRECISION_FX12  2 -#define NV30_FP_OP_OPCODE_SHIFT          24 -#define NV30_FP_OP_OPCODE_MASK          (0x3F << 24) -#  define NV30_FP_OP_OPCODE_NOP  0x00 -#  define NV30_FP_OP_OPCODE_MOV  0x01 -#  define NV30_FP_OP_OPCODE_MUL  0x02 -#  define NV30_FP_OP_OPCODE_ADD  0x03 -#  define NV30_FP_OP_OPCODE_MAD  0x04 -#  define NV30_FP_OP_OPCODE_DP3  0x05 -#  define NV30_FP_OP_OPCODE_DP4  0x06 -#  define NV30_FP_OP_OPCODE_DST  0x07 -#  define NV30_FP_OP_OPCODE_MIN  0x08 -#  define NV30_FP_OP_OPCODE_MAX  0x09 -#  define NV30_FP_OP_OPCODE_SLT  0x0A -#  define NV30_FP_OP_OPCODE_SGE  0x0B -#  define NV30_FP_OP_OPCODE_SLE  0x0C -#  define NV30_FP_OP_OPCODE_SGT  0x0D -#  define NV30_FP_OP_OPCODE_SNE  0x0E -#  define NV30_FP_OP_OPCODE_SEQ  0x0F -#  define NV30_FP_OP_OPCODE_FRC  0x10 -#  define NV30_FP_OP_OPCODE_FLR  0x11 -#  define NV30_FP_OP_OPCODE_KIL  0x12 -#  define NV30_FP_OP_OPCODE_PK4B   0x13 -#  define NV30_FP_OP_OPCODE_UP4B   0x14 -#  define NV30_FP_OP_OPCODE_DDX  0x15 /* can only write XY */ -#  define NV30_FP_OP_OPCODE_DDY  0x16 /* can only write XY */ -#  define NV30_FP_OP_OPCODE_TEX  0x17 -#  define NV30_FP_OP_OPCODE_TXP  0x18 -#  define NV30_FP_OP_OPCODE_TXD  0x19 -#  define NV30_FP_OP_OPCODE_RCP  0x1A -#  define NV30_FP_OP_OPCODE_RSQ  0x1B -#  define NV30_FP_OP_OPCODE_EX2  0x1C -#  define NV30_FP_OP_OPCODE_LG2  0x1D -#  define NV30_FP_OP_OPCODE_LIT  0x1E -#  define NV30_FP_OP_OPCODE_LRP  0x1F -#  define NV30_FP_OP_OPCODE_STR  0x20  -#  define NV30_FP_OP_OPCODE_SFL  0x21 -#  define NV30_FP_OP_OPCODE_COS  0x22 -#  define NV30_FP_OP_OPCODE_SIN  0x23 -#  define NV30_FP_OP_OPCODE_PK2H   0x24 -#  define NV30_FP_OP_OPCODE_UP2H   0x25 -#  define NV30_FP_OP_OPCODE_POW  0x26 -#  define NV30_FP_OP_OPCODE_PK4UB  0x27 -#  define NV30_FP_OP_OPCODE_UP4UB  0x28 -#  define NV30_FP_OP_OPCODE_PK2US  0x29 -#  define NV30_FP_OP_OPCODE_UP2US  0x2A -#  define NV30_FP_OP_OPCODE_DP2A   0x2E -#  define NV30_FP_OP_OPCODE_TXB  0x31 -#  define NV30_FP_OP_OPCODE_RFL  0x36 -#  define NV30_FP_OP_OPCODE_DIV  0x3A -#define NV30_FP_OP_OUT_SAT          (1 << 31) - -/* high order bits of SRC0 */ -#define NV30_FP_OP_OUT_ABS          (1 << 29) -#define NV30_FP_OP_COND_SWZ_W_SHIFT        27 -#define NV30_FP_OP_COND_SWZ_W_MASK        (3 << 27) -#define NV30_FP_OP_COND_SWZ_Z_SHIFT        25 -#define NV30_FP_OP_COND_SWZ_Z_MASK        (3 << 25) -#define NV30_FP_OP_COND_SWZ_Y_SHIFT        23 -#define NV30_FP_OP_COND_SWZ_Y_MASK        (3 << 23) -#define NV30_FP_OP_COND_SWZ_X_SHIFT        21 -#define NV30_FP_OP_COND_SWZ_X_MASK        (3 << 21) -#define NV30_FP_OP_COND_SWZ_ALL_SHIFT        21 -#define NV30_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21) -#define NV30_FP_OP_COND_SHIFT          18 -#define NV30_FP_OP_COND_MASK          (0x07 << 18) -#  define NV30_FP_OP_COND_FL  0 -#  define NV30_FP_OP_COND_LT  1 -#  define NV30_FP_OP_COND_EQ  2 -#  define NV30_FP_OP_COND_LE  3 -#  define NV30_FP_OP_COND_GT  4 -#  define NV30_FP_OP_COND_NE  5 -#  define NV30_FP_OP_COND_GE  6 -#  define NV30_FP_OP_COND_TR  7 - -/* high order bits of SRC1 */ -#define NV30_FP_OP_DST_SCALE_SHIFT        28 -#define NV30_FP_OP_DST_SCALE_MASK        (3 << 28) -#define NV30_FP_OP_DST_SCALE_1X                                                0 -#define NV30_FP_OP_DST_SCALE_2X                                                1 -#define NV30_FP_OP_DST_SCALE_4X                                                2 -#define NV30_FP_OP_DST_SCALE_8X                                                3 -#define NV30_FP_OP_DST_SCALE_INV_2X                                            5 -#define NV30_FP_OP_DST_SCALE_INV_4X                                            6 -#define NV30_FP_OP_DST_SCALE_INV_8X                                            7 - - -/* high order bits of SRC2 */ -#define NV30_FP_OP_INDEX_INPUT          (1 << 30) - -//== Register selection == -#define NV30_FP_REG_TYPE_SHIFT          0 -#define NV30_FP_REG_TYPE_MASK          (3 << 0) -#  define NV30_FP_REG_TYPE_TEMP  0 -#  define NV30_FP_REG_TYPE_INPUT  1 -#  define NV30_FP_REG_TYPE_CONST  2 -#define NV30_FP_REG_SRC_SHIFT          2 /* uncertain */ -#define NV30_FP_REG_SRC_MASK          (31 << 2) -#define NV30_FP_REG_SRC_HALF          (1 << 8) -#define NV30_FP_REG_SWZ_ALL_SHIFT        9 -#define NV30_FP_REG_SWZ_ALL_MASK        (255 << 9) -#define NV30_FP_REG_SWZ_X_SHIFT          9 -#define NV30_FP_REG_SWZ_X_MASK          (3 << 9) -#define NV30_FP_REG_SWZ_Y_SHIFT          11 -#define NV30_FP_REG_SWZ_Y_MASK          (3 << 11) -#define NV30_FP_REG_SWZ_Z_SHIFT          13 -#define NV30_FP_REG_SWZ_Z_MASK          (3 << 13) -#define NV30_FP_REG_SWZ_W_SHIFT          15 -#define NV30_FP_REG_SWZ_W_MASK          (3 << 15) -#  define NV30_FP_SWIZZLE_X  0 -#  define NV30_FP_SWIZZLE_Y  1 -#  define NV30_FP_SWIZZLE_Z  2 -#  define NV30_FP_SWIZZLE_W  3 -#define NV30_FP_REG_NEGATE          (1 << 17) - -#define NV30SR_NONE	0 -#define NV30SR_OUTPUT	1 -#define NV30SR_INPUT	2 -#define NV30SR_TEMP	3 -#define NV30SR_CONST	4 - -struct nv30_sreg { -	int type; -	int index; - -	int dst_scale; - -	int negate; -	int abs; -	int swz[4]; - -	int cc_update; -	int cc_update_reg; -	int cc_test; -	int cc_test_reg; -	int cc_swz[4]; -}; - -static INLINE struct nv30_sreg -nv30_sr(int type, int index) -{ -	struct nv30_sreg temp = { -		.type = type, -		.index = index, -		.dst_scale = DEF_SCALE, -		.abs = 0, -		.negate = 0, -		.swz = { 0, 1, 2, 3 }, -		.cc_update = 0, -		.cc_update_reg = 0, -		.cc_test = DEF_CTEST, -		.cc_test_reg = 0, -		.cc_swz = { 0, 1, 2, 3 }, -	}; -	return temp; -} - -static INLINE struct nv30_sreg -nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) -{ -	struct nv30_sreg dst = src; - -	dst.swz[SWZ_X] = src.swz[x]; -	dst.swz[SWZ_Y] = src.swz[y]; -	dst.swz[SWZ_Z] = src.swz[z]; -	dst.swz[SWZ_W] = src.swz[w]; -	return dst; -} - -static INLINE struct nv30_sreg -nv30_sr_neg(struct nv30_sreg src) -{ -	src.negate = !src.negate; -	return src; -} - -static INLINE struct nv30_sreg -nv30_sr_abs(struct nv30_sreg src) -{ -	src.abs = 1; -	return src; -} - -static INLINE struct nv30_sreg -nv30_sr_scale(struct nv30_sreg src, int scale) -{ -	src.dst_scale = scale; -	return src; -} +#include "nvfx_shader.h"  #endif diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index cf910e34b1..ec6d63889b 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -35,9 +35,9 @@  #define DEF_CTEST 0  #include "nv30_shader.h" -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s))  struct nv30_vpc {  	struct nvfx_vertex_program *vp; @@ -49,21 +49,21 @@ struct nv30_vpc {  	int high_temp;  	int temp_temp_count; -	struct nv30_sreg *imm; +	struct nvfx_sreg *imm;  	unsigned nr_imm;  }; -static struct nv30_sreg +static struct nvfx_sreg  temp(struct nv30_vpc *vpc)  {  	int idx;  	idx  = vpc->temp_temp_count++;  	idx += vpc->high_temp + 1; -	return nv30_sr(NV30SR_TEMP, idx); +	return nvfx_sr(NVFXSR_TEMP, idx);  } -static struct nv30_sreg +static struct nvfx_sreg  constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)  {  	struct nvfx_vertex_program *vp = vpc->vp; @@ -73,7 +73,7 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)  	if (pipe >= 0) {  		for (idx = 0; idx < vp->nr_consts; idx++) {  			if (vp->consts[idx].index == pipe) -				return nv30_sr(NV30SR_CONST, idx); +				return nvfx_sr(NVFXSR_CONST, idx);  		}  	} @@ -86,37 +86,37 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w)  	vpd->value[1] = y;  	vpd->value[2] = z;  	vpd->value[3] = w; -	return nv30_sr(NV30SR_CONST, idx); +	return nvfx_sr(NVFXSR_CONST, idx);  }  #define arith(cc,s,o,d,m,s0,s1,s2) \ -	nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) +	nv30_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))  static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	uint32_t sr = 0;  	switch (src.type) { -	case NV30SR_TEMP: +	case NVFXSR_TEMP:  		sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT);  		sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT);  		break; -	case NV30SR_INPUT: +	case NVFXSR_INPUT:  		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<  		       NV30_VP_SRC_REG_TYPE_SHIFT);  		vp->ir |= (1 << src.index);  		hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT);  		break; -	case NV30SR_CONST: +	case NVFXSR_CONST:  		sr |= (NV30_VP_SRC_REG_TYPE_CONST <<  		       NV30_VP_SRC_REG_TYPE_SHIFT);  		assert(vpc->vpi->const_index == -1 ||  		       vpc->vpi->const_index == src.index);  		vpc->vpi->const_index = src.index;  		break; -	case NV30SR_NONE: +	case NVFXSR_NONE:  		sr |= (NV30_VP_SRC_REG_TYPE_INPUT <<  		       NV30_VP_SRC_REG_TYPE_SHIFT);  		break; @@ -164,15 +164,15 @@ emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src)  }  static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	switch (dst.type) { -	case NV30SR_TEMP: +	case NVFXSR_TEMP:  		hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);  		break; -	case NV30SR_OUTPUT: +	case NVFXSR_OUTPUT:  		switch (dst.index) {  		case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;  		case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; @@ -207,9 +207,9 @@ emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst)  static void  nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, -	      struct nv30_sreg dst, int mask, -	      struct nv30_sreg s0, struct nv30_sreg s1, -	      struct nv30_sreg s2) +	      struct nvfx_sreg dst, int mask, +	      struct nvfx_sreg s0, struct nvfx_sreg s1, +	      struct nvfx_sreg s2)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	uint32_t *hw; @@ -221,7 +221,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,  	hw = vpc->vpi->data; -	hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); +	hw[0] |= (NVFX_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT);  	hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) |  		  (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) |  		  (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | @@ -231,7 +231,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,  //	hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK;  //	hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); -	if (dst.type == NV30SR_OUTPUT) { +	if (dst.type == NVFXSR_OUTPUT) {  		if (slot)  			hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);  		else @@ -249,13 +249,13 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op,  	emit_src(vpc, hw, 2, s2);  } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { -	struct nv30_sreg src; +	struct nvfx_sreg src;  	switch (fsrc->Register.File) {  	case TGSI_FILE_INPUT: -		src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); +		src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);  		break;  	case TGSI_FILE_CONSTANT:  		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -266,7 +266,7 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {  	case TGSI_FILE_TEMPORARY:  		if (vpc->high_temp < fsrc->Register.Index)  			vpc->high_temp = fsrc->Register.Index; -		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); +		src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index);  		break;  	default:  		NOUVEAU_ERR("bad src file\n"); @@ -282,18 +282,18 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {  	return src;  } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg  tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { -	struct nv30_sreg dst; +	struct nvfx_sreg dst;  	switch (fdst->Register.File) {  	case TGSI_FILE_OUTPUT: -		dst = nv30_sr(NV30SR_OUTPUT, +		dst = nvfx_sr(NVFXSR_OUTPUT,  			      vpc->output_map[fdst->Register.Index]);  		break;  	case TGSI_FILE_TEMPORARY: -		dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); +		dst = nvfx_sr(NVFXSR_TEMP, fdst->Register.Index);  		if (vpc->high_temp < dst.index)  			vpc->high_temp = dst.index;  		break; @@ -321,8 +321,8 @@ static boolean  nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,  				const struct tgsi_full_instruction *finst)  { -	struct nv30_sreg src[3], dst, tmp; -	struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); +	struct nvfx_sreg src[3], dst, tmp; +	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);  	int mask;  	int ai = -1, ci = -1;  	int i; @@ -351,7 +351,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,  				src[i] = tgsi_src(vpc, fsrc);  			} else {  				src[i] = temp(vpc); -				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				arith(vpc, VEC, MOV, src[i], MASK_ALL,  				      tgsi_src(vpc, fsrc), none, none);  			}  			break; @@ -365,7 +365,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,  				src[i] = tgsi_src(vpc, fsrc);  			} else {  				src[i] = temp(vpc); -				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				arith(vpc, VEC, MOV, src[i], MASK_ALL,  				      tgsi_src(vpc, fsrc), none, none);  			}  			break; @@ -383,96 +383,96 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,  	switch (finst->Instruction.Opcode) {  	case TGSI_OPCODE_ABS: -		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);  		break;  	case TGSI_OPCODE_ADD: -		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);  		break;  	case TGSI_OPCODE_ARL: -		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_DP3: -		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DP4: -		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DPH: -		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DST: -		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_EX2: -		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_EXP: -		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_FLR: -		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		arith(vpc, VEC, FLR, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_FRC: -		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		arith(vpc, VEC, FRC, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_LG2: -		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_LIT: -		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_LOG: -		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_MAD: -		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);  		break;  	case TGSI_OPCODE_MAX: -		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_MIN: -		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_MOV: -		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		arith(vpc, VEC, MOV, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_MUL: -		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_POW:  		tmp = temp(vpc); -		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		arith(vpc, SCA, LG2, tmp, MASK_X, none, none,  		      swz(src[0], X, X, X, X)); -		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),  		      swz(src[1], X, X, X, X), none); -		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		arith(vpc, SCA, EX2, dst, mask, none, none,  		      swz(tmp, X, X, X, X));  		break;  	case TGSI_OPCODE_RCP: -		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_RET:  		break;  	case TGSI_OPCODE_RSQ: -		arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); +		arith(vpc, SCA, RSQ, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_SGE: -		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_SGT: -		arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_SLT: -		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_SUB: -		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));  		break;  	case TGSI_OPCODE_XPD:  		tmp = temp(vpc); -		arith(vpc, 0, OP_MUL, tmp, mask, +		arith(vpc, VEC, MUL, tmp, mask,  		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); -		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		arith(vpc, VEC, MAD, dst, (mask & ~MASK_W),  		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),  		      neg(tmp));  		break; @@ -564,7 +564,7 @@ nv30_vertprog_prepare(struct nv30_vpc *vpc)  	tgsi_parse_free(&p);  	if (nr_imm) { -		vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); +		vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));  		assert(vpc->imm);  	} @@ -639,7 +639,7 @@ nv30_vertprog_translate(struct nvfx_context *nvfx,  		}  	} -	vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; +	vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;  	vp->translated = TRUE;  out_err:  	tgsi_parse_free(&parse); diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 87d2689d54..4ed87779fd 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -8,7 +8,7 @@  #include "draw/draw_pipe.h"  #include "nv40_context.h" -#define NV40_SHADER_NO_FUCKEDNESS +#define NVFX_SHADER_NO_FUCKEDNESS  #include "nv40_shader.h"  /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 2a0ab0cf31..e044f367a0 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -18,14 +18,14 @@  #define MASK_Z 4  #define MASK_W 8  #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV40_FP_OP_COND_TR -#include "nv40_shader.h" +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) -#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v)  #define MAX_CONSTS 128  #define MAX_IMM 32 @@ -36,8 +36,8 @@ struct nv40_fpc {  	unsigned r_temps;  	unsigned r_temps_discard; -	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; -	struct nv40_sreg *r_temp; +	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nvfx_sreg *r_temp;  	int num_regs; @@ -50,11 +50,11 @@ struct nv40_fpc {  	} consts[MAX_CONSTS];  	int nr_consts; -	struct nv40_sreg imm[MAX_IMM]; +	struct nvfx_sreg imm[MAX_IMM];  	unsigned nr_imm;  }; -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  temp(struct nv40_fpc *fpc)  {  	int idx = ffs(~fpc->r_temps) - 1; @@ -62,12 +62,12 @@ temp(struct nv40_fpc *fpc)  	if (idx < 0) {  		NOUVEAU_ERR("out of temps!!\n");  		assert(0); -		return nv40_sr(NV40SR_TEMP, 0); +		return nvfx_sr(NVFXSR_TEMP, 0);  	}  	fpc->r_temps |= (1 << idx);  	fpc->r_temps_discard |= (1 << idx); -	return nv40_sr(NV40SR_TEMP, idx); +	return nvfx_sr(NVFXSR_TEMP, idx);  }  static INLINE void @@ -77,7 +77,7 @@ release_temps(struct nv40_fpc *fpc)  	fpc->r_temps_discard = 0;  } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  constant(struct nv40_fpc *fpc, int pipe, float vals[4])  {  	int idx; @@ -89,14 +89,14 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4])  	fpc->consts[idx].pipe = pipe;  	if (pipe == -1)  		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); -	return nv40_sr(NV40SR_CONST, idx); +	return nvfx_sr(NVFXSR_CONST, idx);  }  #define arith(cc,s,o,d,m,s0,s1,s2) \ -	nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ +	nv40_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \  			(d), (m), (s0), (s1), (s2))  #define tex(cc,s,o,u,d,m,s0,s1,s2) \ -	nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ +	nv40_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \  		    (d), (m), (s0), none, none)  static void @@ -109,25 +109,25 @@ grow_insns(struct nv40_fpc *fpc, int size)  }  static void -emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +emit_src(struct nv40_fpc *fpc, int pos, struct nvfx_sreg src)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw = &fp->insn[fpc->inst_offset];  	uint32_t sr = 0;  	switch (src.type) { -	case NV40SR_INPUT: -		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); -		hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); +	case NVFXSR_INPUT: +		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); +		hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);  		break; -	case NV40SR_OUTPUT: -		sr |= NV40_FP_REG_SRC_HALF; +	case NVFXSR_OUTPUT: +		sr |= NVFX_FP_REG_SRC_HALF;  		/* fall-through */ -	case NV40SR_TEMP: -		sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); -		sr |= (src.index << NV40_FP_REG_SRC_SHIFT); +	case NVFXSR_TEMP: +		sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); +		sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);  		break; -	case NV40SR_CONST: +	case NVFXSR_CONST:  		if (!fpc->have_const) {  			grow_insns(fpc, 4);  			fpc->have_const = 1; @@ -149,61 +149,61 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)  				sizeof(uint32_t) * 4);  		} -		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); +		sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);  		break; -	case NV40SR_NONE: -		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); +	case NVFXSR_NONE: +		sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);  		break;  	default:  		assert(0);  	}  	if (src.negate) -		sr |= NV40_FP_REG_NEGATE; +		sr |= NVFX_FP_REG_NEGATE;  	if (src.abs)  		hw[1] |= (1 << (29 + pos)); -	sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | -	       (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | -	       (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | -	       (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); +	sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | +	       (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | +	       (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | +	       (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));  	hw[pos + 1] |= sr;  }  static void -emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +emit_dst(struct nv40_fpc *fpc, struct nvfx_sreg dst)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw = &fp->insn[fpc->inst_offset];  	switch (dst.type) { -	case NV40SR_TEMP: +	case NVFXSR_TEMP:  		if (fpc->num_regs < (dst.index + 1))  			fpc->num_regs = dst.index + 1;  		break; -	case NV40SR_OUTPUT: +	case NVFXSR_OUTPUT:  		if (dst.index == 1) {  			fp->fp_control |= 0xe;  		} else { -			hw[0] |= NV40_FP_OP_OUT_REG_HALF; +			hw[0] |= NVFX_FP_OP_OUT_REG_HALF;  		}  		break; -	case NV40SR_NONE: +	case NVFXSR_NONE:  		hw[0] |= (1 << 30);  		break;  	default:  		assert(0);  	} -	hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +	hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);  }  static void  nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, -	      struct nv40_sreg dst, int mask, -	      struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +	      struct nvfx_sreg dst, int mask, +	      struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	uint32_t *hw; @@ -214,22 +214,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,  	hw = &fp->insn[fpc->inst_offset];  	memset(hw, 0, sizeof(uint32_t) * 4); -	if (op == NV40_FP_OP_OPCODE_KIL) +	if (op == NVFX_FP_OP_OPCODE_KIL)  		fp->fp_control |= NV40TCL_FP_CONTROL_KIL; -	hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); -	hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); -	hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); +	hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); +	hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); +	hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);  	if (sat) -		hw[0] |= NV40_FP_OP_OUT_SAT; +		hw[0] |= NVFX_FP_OP_OUT_SAT;  	if (dst.cc_update) -		hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; -	hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); -	hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | -		  (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | -		  (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | -		  (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); +		hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; +	hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); +	hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | +		  (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | +		  (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | +		  (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));  	emit_dst(fpc, dst);  	emit_src(fpc, 0, s0); @@ -239,25 +239,25 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,  static void  nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, -	    struct nv40_sreg dst, int mask, -	    struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +	    struct nvfx_sreg dst, int mask, +	    struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)  {  	struct nvfx_fragment_program *fp = fpc->fp;  	nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); -	fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); +	fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);  	fp->samplers |= (1 << unit);  } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)  { -	struct nv40_sreg src; +	struct nvfx_sreg src;  	switch (fsrc->Register.File) {  	case TGSI_FILE_INPUT: -		src = nv40_sr(NV40SR_INPUT, +		src = nvfx_sr(NVFXSR_INPUT,  			      fpc->attrib_map[fsrc->Register.Index]);  		break;  	case TGSI_FILE_CONSTANT: @@ -288,7 +288,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)  	return src;  } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {  	switch (fdst->Register.File) {  	case TGSI_FILE_OUTPUT: @@ -296,10 +296,10 @@ tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {  	case TGSI_FILE_TEMPORARY:  		return fpc->r_temp[fdst->Register.Index];  	case TGSI_FILE_NULL: -		return nv40_sr(NV40SR_NONE, 0); +		return nvfx_sr(NVFXSR_NONE, 0);  	default:  		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); -		return nv40_sr(NV40SR_NONE, 0); +		return nvfx_sr(NVFXSR_NONE, 0);  	}  } @@ -317,10 +317,10 @@ tgsi_mask(uint tgsi)  static boolean  src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, -	       struct nv40_sreg *src) +	       struct nvfx_sreg *src)  { -	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); -	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); +	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); +	struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc);  	uint mask = 0;  	uint c; @@ -352,8 +352,8 @@ static boolean  nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,  				const struct tgsi_full_instruction *finst)  { -	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); -	struct nv40_sreg src[3], dst, tmp; +	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); +	struct nvfx_sreg src[3], dst, tmp;  	int mask, sat, unit;  	int ai = -1, ci = -1, ii = -1;  	int i; @@ -445,12 +445,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,  		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_CMP: -		tmp = nv40_sr(NV40SR_NONE, 0); +		tmp = nvfx_sr(NVFXSR_NONE, 0);  		tmp.cc_update = 1;  		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); -		dst.cc_test = NV40_VP_INST_COND_GE; +		dst.cc_test = NVFX_VP_INST_COND_GE;  		arith(fpc, sat, MOV, dst, mask, src[2], none, none); -		dst.cc_test = NV40_VP_INST_COND_LT; +		dst.cc_test = NVFX_VP_INST_COND_LT;  		arith(fpc, sat, MOV, dst, mask, src[1], none, none);  		break;  	case TGSI_OPCODE_COS: @@ -512,10 +512,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,  		arith(fpc, 0, KIL, none, 0, none, none, none);  		break;  	case TGSI_OPCODE_KIL: -		dst = nv40_sr(NV40SR_NONE, 0); +		dst = nvfx_sr(NVFXSR_NONE, 0);  		dst.cc_update = 1;  		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); -		dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; +		dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT;  		arith(fpc, 0, KIL, dst, 0, none, none, none);  		break;  	case TGSI_OPCODE_LG2: @@ -662,25 +662,25 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,  	switch (fdec->Semantic.Name) {  	case TGSI_SEMANTIC_POSITION: -		hw = NV40_FP_OP_INPUT_SRC_POSITION; +		hw = NVFX_FP_OP_INPUT_SRC_POSITION;  		break;  	case TGSI_SEMANTIC_COLOR:  		if (fdec->Semantic.Index == 0) { -			hw = NV40_FP_OP_INPUT_SRC_COL0; +			hw = NVFX_FP_OP_INPUT_SRC_COL0;  		} else  		if (fdec->Semantic.Index == 1) { -			hw = NV40_FP_OP_INPUT_SRC_COL1; +			hw = NVFX_FP_OP_INPUT_SRC_COL1;  		} else {  			NOUVEAU_ERR("bad colour semantic index\n");  			return FALSE;  		}  		break;  	case TGSI_SEMANTIC_FOG: -		hw = NV40_FP_OP_INPUT_SRC_FOGC; +		hw = NVFX_FP_OP_INPUT_SRC_FOGC;  		break;  	case TGSI_SEMANTIC_GENERIC:  		if (fdec->Semantic.Index <= 7) { -			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. +			hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.  						     Index);  		} else {  			NOUVEAU_ERR("bad generic semantic index\n"); @@ -723,7 +723,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,  		return FALSE;  	} -	fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);  	fpc->r_temps |= (1 << hw);  	return TRUE;  } @@ -787,7 +787,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)  	tgsi_parse_free(&p);  	if (++high_temp) { -		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));  		for (i = 0; i < high_temp; i++)  			fpc->r_temp[i] = temp(fpc);  		fpc->r_temps_discard = 0; diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h index 854dccf548..8d28137e9d 100644 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -48,14 +48,6 @@  #define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13)  #define NV40_VP_INST_COND_SHIFT                                               10  #define NV40_VP_INST_COND_MASK                                       (0x7 << 10) -#    define NV40_VP_INST_COND_FL                                               0 -#    define NV40_VP_INST_COND_LT                                               1 -#    define NV40_VP_INST_COND_EQ                                               2 -#    define NV40_VP_INST_COND_LE                                               3 -#    define NV40_VP_INST_COND_GT                                               4 -#    define NV40_VP_INST_COND_NE                                               5 -#    define NV40_VP_INST_COND_GE                                               6 -#    define NV40_VP_INST_COND_TR                                               7  #define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8  #define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8)  #define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6 @@ -84,63 +76,12 @@  /* ---- OPCODE BITS 95:64 / data DWORD 1 --- */  #define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22  #define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22) -#    define NV40_VP_INST_OP_NOP                                             0x00 -#    define NV40_VP_INST_OP_MOV                                             0x01 -#    define NV40_VP_INST_OP_MUL                                             0x02 -#    define NV40_VP_INST_OP_ADD                                             0x03 -#    define NV40_VP_INST_OP_MAD                                             0x04 -#    define NV40_VP_INST_OP_DP3                                             0x05 -#    define NV40_VP_INST_OP_DPH                                             0x06 -#    define NV40_VP_INST_OP_DP4                                             0x07 -#    define NV40_VP_INST_OP_DST                                             0x08 -#    define NV40_VP_INST_OP_MIN                                             0x09 -#    define NV40_VP_INST_OP_MAX                                             0x0A -#    define NV40_VP_INST_OP_SLT                                             0x0B -#    define NV40_VP_INST_OP_SGE                                             0x0C -#    define NV40_VP_INST_OP_ARL                                             0x0D -#    define NV40_VP_INST_OP_FRC                                             0x0E -#    define NV40_VP_INST_OP_FLR                                             0x0F -#    define NV40_VP_INST_OP_SEQ                                             0x10 -#    define NV40_VP_INST_OP_SFL                                             0x11 -#    define NV40_VP_INST_OP_SGT                                             0x12 -#    define NV40_VP_INST_OP_SLE                                             0x13 -#    define NV40_VP_INST_OP_SNE                                             0x14 -#    define NV40_VP_INST_OP_STR                                             0x15 -#    define NV40_VP_INST_OP_SSG                                             0x16 -#    define NV40_VP_INST_OP_ARR                                             0x17 -#    define NV40_VP_INST_OP_ARA                                             0x18 -#    define NV40_VP_INST_OP_TXL                                             0x19  #define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27  #define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27) -#    define NV40_VP_INST_OP_NOP                                             0x00 -#    define NV40_VP_INST_OP_MOV                                             0x01 -#    define NV40_VP_INST_OP_RCP                                             0x02 -#    define NV40_VP_INST_OP_RCC                                             0x03 -#    define NV40_VP_INST_OP_RSQ                                             0x04 -#    define NV40_VP_INST_OP_EXP                                             0x05 -#    define NV40_VP_INST_OP_LOG                                             0x06 -#    define NV40_VP_INST_OP_LIT                                             0x07 -#    define NV40_VP_INST_OP_BRA                                             0x09 -#    define NV40_VP_INST_OP_CAL                                             0x0B -#    define NV40_VP_INST_OP_RET                                             0x0C -#    define NV40_VP_INST_OP_LG2                                             0x0D -#    define NV40_VP_INST_OP_EX2                                             0x0E -#    define NV40_VP_INST_OP_SIN                                             0x0F -#    define NV40_VP_INST_OP_COS                                             0x10 -#    define NV40_VP_INST_OP_PUSHA                                           0x13 -#    define NV40_VP_INST_OP_POPA                                            0x14  #define NV40_VP_INST_CONST_SRC_SHIFT                                          12  #define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12)  #define NV40_VP_INST_INPUT_SRC_SHIFT                                           8  #define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8) -#    define NV40_VP_INST_IN_POS                                                0 -#    define NV40_VP_INST_IN_WEIGHT                                             1 -#    define NV40_VP_INST_IN_NORMAL                                             2 -#    define NV40_VP_INST_IN_COL0                                               3 -#    define NV40_VP_INST_IN_COL1                                               4 -#    define NV40_VP_INST_IN_FOGC                                               5 -#    define NV40_VP_INST_IN_TC0                                                8 -#    define NV40_VP_INST_IN_TC(n)                                          (8+n)  #define NV40_VP_INST_SRC0H_SHIFT                                               0  #define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0)  #define NV40_VP_INST1_KNOWN ( \ @@ -194,7 +135,6 @@  #    define NV40_VP_INST_DEST_TC(n)                                        (7+n)  #    define NV40_VP_INST_DEST_TEMP                                          0x1F  #define NV40_VP_INST_INDEX_CONST                                        (1 << 1) -#define NV40_VP_INST_LAST                                               (1 << 0)  #define NV40_VP_INST3_KNOWN ( \                  NV40_VP_INST_SRC2L_MASK |\                  NV40_VP_INST_SCA_WRITEMASK_MASK |\ @@ -232,325 +172,7 @@  #    define NV40_VP_SRC_REG_TYPE_INPUT                                         2  #    define NV40_VP_SRC_REG_TYPE_CONST                                         3 +#include "nvfx_shader.h" -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - *         0 - Opcode, output reg/mask, ATTRIB source - *         1 - Source 0 - *         2 - Source 1 - *         3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - *                 result.color == R0.xyzw - *                 result.depth == R1.z - * When the fragprog contains instructions to write depth, - * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - *  - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords.  As such instructions such as: - *  - *                 ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and - * SWIZZLE_ONE. - * - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as - * SWIZZLE_ZERO is implemented simply by not writing to the relevant components - * of the destination. - * - * Looping - *   Loops appear to be fairly expensive on NV40 at least, the proprietary - *   driver goes to a lot of effort to avoid using the native looping - *   instructions.  If the total number of *executed* instructions between - *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. - *   The maximum loop count is 255. - * - * Conditional execution - *   TODO - *  - * Non-native instructions: - *         LIT - *         LRP - MAD+MAD - *         SUB - ADD, negate second source - *         RSQ - LG2 + EX2 - *         POW - LG2 + MUL + EX2 - *         SCS - COS + SIN - *         XPD - *         DP2 - MUL + ADD - *         NRM - */ - -//== Opcode / Destination selection == -#define NV40_FP_OP_PROGRAM_END                                          (1 << 0) -#define NV40_FP_OP_OUT_REG_SHIFT                                               1 -#define NV40_FP_OP_OUT_REG_MASK                                        (63 << 1) -/* Needs to be set when writing outputs to get expected result.. */ -#define NV40_FP_OP_OUT_REG_HALF                                         (1 << 7) -#define NV40_FP_OP_COND_WRITE_ENABLE                                    (1 << 8) -#define NV40_FP_OP_OUTMASK_SHIFT                                               9 -#define NV40_FP_OP_OUTMASK_MASK                                       (0xF << 9) -#    define NV40_FP_OP_OUT_X                                            (1 << 9) -#    define NV40_FP_OP_OUT_Y                                            (1 <<10) -#    define NV40_FP_OP_OUT_Z                                            (1 <<11) -#    define NV40_FP_OP_OUT_W                                            (1 <<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV40_FP_OP_INPUT_SRC_SHIFT                                            13 -#define NV40_FP_OP_INPUT_SRC_MASK                                     (15 << 13) -#    define NV40_FP_OP_INPUT_SRC_POSITION                                    0x0 -#    define NV40_FP_OP_INPUT_SRC_COL0                                        0x1 -#    define NV40_FP_OP_INPUT_SRC_COL1                                        0x2 -#    define NV40_FP_OP_INPUT_SRC_FOGC                                        0x3 -#    define NV40_FP_OP_INPUT_SRC_TC0                                         0x4 -#    define NV40_FP_OP_INPUT_SRC_TC(n)                                 (0x4 + n) -#    define NV40_FP_OP_INPUT_SRC_FACING                                      0xE -#define NV40_FP_OP_TEX_UNIT_SHIFT                                             17 -#define NV40_FP_OP_TEX_UNIT_MASK                                     (0xF << 17) -#define NV40_FP_OP_PRECISION_SHIFT                                            22 -#define NV40_FP_OP_PRECISION_MASK                                      (3 << 22) -#   define NV40_FP_PRECISION_FP32                                              0 -#   define NV40_FP_PRECISION_FP16                                              1 -#   define NV40_FP_PRECISION_FX12                                              2 -#define NV40_FP_OP_OPCODE_SHIFT                                               24 -#define NV40_FP_OP_OPCODE_MASK                                      (0x3F << 24) -#        define NV40_FP_OP_OPCODE_NOP                                       0x00 -#        define NV40_FP_OP_OPCODE_MOV                                       0x01 -#        define NV40_FP_OP_OPCODE_MUL                                       0x02 -#        define NV40_FP_OP_OPCODE_ADD                                       0x03 -#        define NV40_FP_OP_OPCODE_MAD                                       0x04 -#        define NV40_FP_OP_OPCODE_DP3                                       0x05 -#        define NV40_FP_OP_OPCODE_DP4                                       0x06 -#        define NV40_FP_OP_OPCODE_DST                                       0x07 -#        define NV40_FP_OP_OPCODE_MIN                                       0x08 -#        define NV40_FP_OP_OPCODE_MAX                                       0x09 -#        define NV40_FP_OP_OPCODE_SLT                                       0x0A -#        define NV40_FP_OP_OPCODE_SGE                                       0x0B -#        define NV40_FP_OP_OPCODE_SLE                                       0x0C -#        define NV40_FP_OP_OPCODE_SGT                                       0x0D -#        define NV40_FP_OP_OPCODE_SNE                                       0x0E -#        define NV40_FP_OP_OPCODE_SEQ                                       0x0F -#        define NV40_FP_OP_OPCODE_FRC                                       0x10 -#        define NV40_FP_OP_OPCODE_FLR                                       0x11 -#        define NV40_FP_OP_OPCODE_KIL                                       0x12 -#        define NV40_FP_OP_OPCODE_PK4B                                      0x13 -#        define NV40_FP_OP_OPCODE_UP4B                                      0x14 -/* DDX/DDY can only write to XY */ -#        define NV40_FP_OP_OPCODE_DDX                                       0x15 -#        define NV40_FP_OP_OPCODE_DDY                                       0x16 -#        define NV40_FP_OP_OPCODE_TEX                                       0x17 -#        define NV40_FP_OP_OPCODE_TXP                                       0x18 -#        define NV40_FP_OP_OPCODE_TXD                                       0x19 -#        define NV40_FP_OP_OPCODE_RCP                                       0x1A -#        define NV40_FP_OP_OPCODE_EX2                                       0x1C -#        define NV40_FP_OP_OPCODE_LG2                                       0x1D -#        define NV40_FP_OP_OPCODE_STR                                       0x20 -#        define NV40_FP_OP_OPCODE_SFL                                       0x21 -#        define NV40_FP_OP_OPCODE_COS                                       0x22 -#        define NV40_FP_OP_OPCODE_SIN                                       0x23 -#        define NV40_FP_OP_OPCODE_PK2H                                      0x24 -#        define NV40_FP_OP_OPCODE_UP2H                                      0x25 -#        define NV40_FP_OP_OPCODE_PK4UB                                     0x27 -#        define NV40_FP_OP_OPCODE_UP4UB                                     0x28 -#        define NV40_FP_OP_OPCODE_PK2US                                     0x29 -#        define NV40_FP_OP_OPCODE_UP2US                                     0x2A -#        define NV40_FP_OP_OPCODE_DP2A                                      0x2E -#        define NV40_FP_OP_OPCODE_TXL                                       0x2F -#        define NV40_FP_OP_OPCODE_TXB                                       0x31 -#        define NV40_FP_OP_OPCODE_DIV                                       0x3A -#        define NV40_FP_OP_OPCODE_UNK_LIT                                   0x3C -/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ -#        define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0 -#        define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1 -#        define NV40_FP_OP_BRA_OPCODE_IF                                     0x2 -#        define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3 -#        define NV40_FP_OP_BRA_OPCODE_REP                                    0x4 -#        define NV40_FP_OP_BRA_OPCODE_RET                                    0x5 -#define NV40_FP_OP_OUT_SAT                                             (1 << 31) - -/* high order bits of SRC0 */ -#define NV40_FP_OP_OUT_ABS                                             (1 << 29) -#define NV40_FP_OP_COND_SWZ_W_SHIFT                                           27 -#define NV40_FP_OP_COND_SWZ_W_MASK                                     (3 << 27) -#define NV40_FP_OP_COND_SWZ_Z_SHIFT                                           25 -#define NV40_FP_OP_COND_SWZ_Z_MASK                                     (3 << 25) -#define NV40_FP_OP_COND_SWZ_Y_SHIFT                                           23 -#define NV40_FP_OP_COND_SWZ_Y_MASK                                     (3 << 23) -#define NV40_FP_OP_COND_SWZ_X_SHIFT                                           21 -#define NV40_FP_OP_COND_SWZ_X_MASK                                     (3 << 21) -#define NV40_FP_OP_COND_SWZ_ALL_SHIFT                                         21 -#define NV40_FP_OP_COND_SWZ_ALL_MASK                                (0xFF << 21) -#define NV40_FP_OP_COND_SHIFT                                                 18 -#define NV40_FP_OP_COND_MASK                                        (0x07 << 18) -#        define NV40_FP_OP_COND_FL                                             0 -#        define NV40_FP_OP_COND_LT                                             1 -#        define NV40_FP_OP_COND_EQ                                             2 -#        define NV40_FP_OP_COND_LE                                             3 -#        define NV40_FP_OP_COND_GT                                             4 -#        define NV40_FP_OP_COND_NE                                             5 -#        define NV40_FP_OP_COND_GE                                             6 -#        define NV40_FP_OP_COND_TR                                             7 - -/* high order bits of SRC1 */ -#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31) -#define NV40_FP_OP_DST_SCALE_SHIFT                                            28 -#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28) -#define NV40_FP_OP_DST_SCALE_1X                                                0 -#define NV40_FP_OP_DST_SCALE_2X                                                1 -#define NV40_FP_OP_DST_SCALE_4X                                                2 -#define NV40_FP_OP_DST_SCALE_8X                                                3 -#define NV40_FP_OP_DST_SCALE_INV_2X                                            5 -#define NV40_FP_OP_DST_SCALE_INV_4X                                            6 -#define NV40_FP_OP_DST_SCALE_INV_8X                                            7 - -/* SRC1 LOOP */ -#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19 -#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19) -#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10 -#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10) -#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2 -#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2) - -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT                                               2 -#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2) - -/* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT                                                 2 -#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2) - -/* SRC1 REP - *   I have no idea why there are 3 count values here..  but they - *   have always been filled with the same value in my tests so - *   far.. - */ -#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2 -#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2) -#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10 -#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10) -#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19 -#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19) - -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT                                                2 -#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2) - -// SRC2 high-order -#define NV40_FP_OP_INDEX_INPUT                                         (1 << 30) -#define NV40_FP_OP_ADDR_INDEX_SHIFT                                           19 -#define NV40_FP_OP_ADDR_INDEX_MASK                                   (0xF << 19) - -//== Register selection == -#define NV40_FP_REG_TYPE_SHIFT                                                 0 -#define NV40_FP_REG_TYPE_MASK                                           (3 << 0) -#        define NV40_FP_REG_TYPE_TEMP                                          0 -#        define NV40_FP_REG_TYPE_INPUT                                         1 -#        define NV40_FP_REG_TYPE_CONST                                         2 -#define NV40_FP_REG_SRC_SHIFT                                                  2 -#define NV40_FP_REG_SRC_MASK                                           (63 << 2) -#define NV40_FP_REG_SRC_HALF                                            (1 << 8) -#define NV40_FP_REG_SWZ_ALL_SHIFT                                              9 -#define NV40_FP_REG_SWZ_ALL_MASK                                      (255 << 9) -#define NV40_FP_REG_SWZ_X_SHIFT                                                9 -#define NV40_FP_REG_SWZ_X_MASK                                          (3 << 9) -#define NV40_FP_REG_SWZ_Y_SHIFT                                               11 -#define NV40_FP_REG_SWZ_Y_MASK                                         (3 << 11) -#define NV40_FP_REG_SWZ_Z_SHIFT                                               13 -#define NV40_FP_REG_SWZ_Z_MASK                                         (3 << 13) -#define NV40_FP_REG_SWZ_W_SHIFT                                               15 -#define NV40_FP_REG_SWZ_W_MASK                                         (3 << 15) -#        define NV40_FP_SWIZZLE_X                                              0 -#        define NV40_FP_SWIZZLE_Y                                              1 -#        define NV40_FP_SWIZZLE_Z                                              2 -#        define NV40_FP_SWIZZLE_W                                              3 -#define NV40_FP_REG_NEGATE                                             (1 << 17) - -#ifndef NV40_SHADER_NO_FUCKEDNESS -#define NV40SR_NONE	0 -#define NV40SR_OUTPUT	1 -#define NV40SR_INPUT	2 -#define NV40SR_TEMP	3 -#define NV40SR_CONST	4 - -struct nv40_sreg { -	int type; -	int index; - -	int dst_scale; - -	int negate; -	int abs; -	int swz[4]; - -	int cc_update; -	int cc_update_reg; -	int cc_test; -	int cc_test_reg; -	int cc_swz[4]; -}; - -static INLINE struct nv40_sreg -nv40_sr(int type, int index) -{ -	struct nv40_sreg temp = { -		.type = type, -		.index = index, -		.dst_scale = DEF_SCALE, -		.abs = 0, -		.negate = 0, -		.swz = { 0, 1, 2, 3 }, -		.cc_update = 0, -		.cc_update_reg = 0, -		.cc_test = DEF_CTEST, -		.cc_test_reg = 0, -		.cc_swz = { 0, 1, 2, 3 }, -	}; -	return temp; -} - -static INLINE struct nv40_sreg -nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) -{ -	struct nv40_sreg dst = src; - -	dst.swz[SWZ_X] = src.swz[x]; -	dst.swz[SWZ_Y] = src.swz[y]; -	dst.swz[SWZ_Z] = src.swz[z]; -	dst.swz[SWZ_W] = src.swz[w]; -	return dst; -} - -static INLINE struct nv40_sreg -nv40_sr_neg(struct nv40_sreg src) -{ -	src.negate = !src.negate; -	return src; -} - -static INLINE struct nv40_sreg -nv40_sr_abs(struct nv40_sreg src) -{ -	src.abs = 1; -	return src; -} - -static INLINE struct nv40_sreg -nv40_sr_scale(struct nv40_sreg src, int scale) -{ -	src.dst_scale = scale; -	return src; -}  #endif -#endif diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index a199f0766e..752cd0d1b3 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -34,9 +34,9 @@  #define DEF_CTEST 0  #include "nv40_shader.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s))  #define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) @@ -47,17 +47,17 @@ struct nv40_vpc {  	unsigned r_temps;  	unsigned r_temps_discard; -	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; -	struct nv40_sreg *r_address; -	struct nv40_sreg *r_temp; +	struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nvfx_sreg *r_address; +	struct nvfx_sreg *r_temp; -	struct nv40_sreg *imm; +	struct nvfx_sreg *imm;  	unsigned nr_imm;  	unsigned hpos_idx;  }; -static struct nv40_sreg +static struct nvfx_sreg  temp(struct nv40_vpc *vpc)  {  	int idx = ffs(~vpc->r_temps) - 1; @@ -65,12 +65,12 @@ temp(struct nv40_vpc *vpc)  	if (idx < 0) {  		NOUVEAU_ERR("out of temps!!\n");  		assert(0); -		return nv40_sr(NV40SR_TEMP, 0); +		return nvfx_sr(NVFXSR_TEMP, 0);  	}  	vpc->r_temps |= (1 << idx);  	vpc->r_temps_discard |= (1 << idx); -	return nv40_sr(NV40SR_TEMP, idx); +	return nvfx_sr(NVFXSR_TEMP, idx);  }  static INLINE void @@ -80,7 +80,7 @@ release_temps(struct nv40_vpc *vpc)  	vpc->r_temps_discard = 0;  } -static struct nv40_sreg +static struct nvfx_sreg  constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)  {  	struct nvfx_vertex_program *vp = vpc->vp; @@ -90,7 +90,7 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)  	if (pipe >= 0) {  		for (idx = 0; idx < vp->nr_consts; idx++) {  			if (vp->consts[idx].index == pipe) -				return nv40_sr(NV40SR_CONST, idx); +				return nvfx_sr(NVFXSR_CONST, idx);  		}  	} @@ -103,37 +103,37 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w)  	vpd->value[1] = y;  	vpd->value[2] = z;  	vpd->value[3] = w; -	return nv40_sr(NV40SR_CONST, idx); +	return nvfx_sr(NVFXSR_CONST, idx);  }  #define arith(cc,s,o,d,m,s0,s1,s2) \ -	nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) +	nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2))  static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	uint32_t sr = 0;  	switch (src.type) { -	case NV40SR_TEMP: +	case NVFXSR_TEMP:  		sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT);  		sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);  		break; -	case NV40SR_INPUT: +	case NVFXSR_INPUT:  		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<  		       NV40_VP_SRC_REG_TYPE_SHIFT);  		vp->ir |= (1 << src.index);  		hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);  		break; -	case NV40SR_CONST: +	case NVFXSR_CONST:  		sr |= (NV40_VP_SRC_REG_TYPE_CONST <<  		       NV40_VP_SRC_REG_TYPE_SHIFT);  		assert(vpc->vpi->const_index == -1 ||  		       vpc->vpi->const_index == src.index);  		vpc->vpi->const_index = src.index;  		break; -	case NV40SR_NONE: +	case NVFXSR_NONE:  		sr |= (NV40_VP_SRC_REG_TYPE_INPUT <<  		       NV40_VP_SRC_REG_TYPE_SHIFT);  		break; @@ -174,12 +174,12 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)  }  static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	switch (dst.type) { -	case NV40SR_TEMP: +	case NVFXSR_TEMP:  		hw[3] |= NV40_VP_INST_DEST_MASK;  		if (slot == 0) {  			hw[0] |= (dst.index << @@ -189,7 +189,7 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)  				  NV40_VP_INST_SCA_DEST_TEMP_SHIFT);  		}  		break; -	case NV40SR_OUTPUT: +	case NVFXSR_OUTPUT:  		switch (dst.index) {  		case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;  		case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; @@ -255,9 +255,9 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)  static void  nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, -	      struct nv40_sreg dst, int mask, -	      struct nv40_sreg s0, struct nv40_sreg s1, -	      struct nv40_sreg s2) +	      struct nvfx_sreg dst, int mask, +	      struct nvfx_sreg s0, struct nvfx_sreg s1, +	      struct nvfx_sreg s2)  {  	struct nvfx_vertex_program *vp = vpc->vp;  	uint32_t *hw; @@ -269,7 +269,7 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,  	hw = vpc->vpi->data; -	hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); +	hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);  	hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |  		  (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |  		  (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | @@ -291,13 +291,13 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,  	emit_src(vpc, hw, 2, s2);  } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { -	struct nv40_sreg src; +	struct nvfx_sreg src;  	switch (fsrc->Register.File) {  	case TGSI_FILE_INPUT: -		src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); +		src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index);  		break;  	case TGSI_FILE_CONSTANT:  		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -322,9 +322,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {  	return src;  } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg  tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { -	struct nv40_sreg dst; +	struct nvfx_sreg dst;  	switch (fdst->Register.File) {  	case TGSI_FILE_OUTPUT: @@ -358,10 +358,10 @@ tgsi_mask(uint tgsi)  static boolean  src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, -	       struct nv40_sreg *src) +	       struct nvfx_sreg *src)  { -	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); -	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); +	const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); +	struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc);  	uint mask = 0;  	uint c; @@ -384,7 +384,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,  	*src = temp(vpc);  	if (mask) -		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); +		arith(vpc, VEC, MOV, *src, mask, tgsi, none, none);  	return FALSE;  } @@ -393,8 +393,8 @@ static boolean  nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,  				const struct tgsi_full_instruction *finst)  { -	struct nv40_sreg src[3], dst, tmp; -	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nvfx_sreg src[3], dst, tmp; +	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);  	int mask;  	int ai = -1, ci = -1, ii = -1;  	int i; @@ -434,7 +434,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,  				src[i] = tgsi_src(vpc, fsrc);  			} else {  				src[i] = temp(vpc); -				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				arith(vpc, VEC, MOV, src[i], MASK_ALL,  				      tgsi_src(vpc, fsrc), none, none);  			}  			break; @@ -445,7 +445,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,  				src[i] = tgsi_src(vpc, fsrc);  			} else {  				src[i] = temp(vpc); -				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				arith(vpc, VEC, MOV, src[i], MASK_ALL,  				      tgsi_src(vpc, fsrc), none, none);  			}  			break; @@ -456,7 +456,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,  				src[i] = tgsi_src(vpc, fsrc);  			} else {  				src[i] = temp(vpc); -				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				arith(vpc, VEC, MOV, src[i], MASK_ALL,  				      tgsi_src(vpc, fsrc), none, none);  			}  			break; @@ -474,93 +474,93 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,  	switch (finst->Instruction.Opcode) {  	case TGSI_OPCODE_ABS: -		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none);  		break;  	case TGSI_OPCODE_ADD: -		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]);  		break;  	case TGSI_OPCODE_ARL: -		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_DP3: -		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DP4: -		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DPH: -		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_DST: -		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, DST, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_EX2: -		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		arith(vpc, SCA, EX2, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_EXP: -		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		arith(vpc, SCA, EXP, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_FLR: -		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		arith(vpc, VEC, FLR, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_FRC: -		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		arith(vpc, VEC, FRC, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_LG2: -		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LG2, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_LIT: -		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LIT, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_LOG: -		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_MAD: -		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);  		break;  	case TGSI_OPCODE_MAX: -		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_MIN: -		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_MOV: -		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		arith(vpc, VEC, MOV, dst, mask, src[0], none, none);  		break;  	case TGSI_OPCODE_MUL: -		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_POW:  		tmp = temp(vpc); -		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		arith(vpc, SCA, LG2, tmp, MASK_X, none, none,  		      swz(src[0], X, X, X, X)); -		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),  		      swz(src[1], X, X, X, X), none); -		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		arith(vpc, SCA, EX2, dst, mask, none, none,  		      swz(tmp, X, X, X, X));  		break;  	case TGSI_OPCODE_RCP: -		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		arith(vpc, SCA, RCP, dst, mask, none, none, src[0]);  		break;  	case TGSI_OPCODE_RET:  		break;  	case TGSI_OPCODE_RSQ: -		arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); +		arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));  		break;  	case TGSI_OPCODE_SGE: -		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_SLT: -		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);  		break;  	case TGSI_OPCODE_SUB: -		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));  		break;  	case TGSI_OPCODE_XPD:  		tmp = temp(vpc); -		arith(vpc, 0, OP_MUL, tmp, mask, +		arith(vpc, VEC, MUL, tmp, mask,  		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); -		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		arith(vpc, VEC, MAD, dst, (mask & ~MASK_W),  		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),  		      neg(tmp));  		break; @@ -630,7 +630,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,  		return FALSE;  	} -	vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);  	return TRUE;  } @@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)  	tgsi_parse_free(&p);  	if (nr_imm) { -		vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); +		vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg));  		assert(vpc->imm);  	}  	if (++high_temp) { -		vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));  		for (i = 0; i < high_temp; i++)  			vpc->r_temp[i] = temp(vpc);  	}  	if (++high_addr) { -		vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); +		vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg));  		for (i = 0; i < high_addr; i++)  			vpc->r_address[i] = temp(vpc);  	} @@ -728,7 +728,7 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,  {  	struct tgsi_parse_context parse;  	struct nv40_vpc *vpc = NULL; -	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);  	int i;  	vpc = CALLOC(1, sizeof(struct nv40_vpc)); @@ -785,24 +785,24 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,  	}  	/* Write out HPOS if it was redirected to a temp earlier */ -	if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { -		struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, +	if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { +		struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT,  						NV40_VP_INST_DEST_POS); -		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; +		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; -		arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); +		arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none);  	}  	/* Insert code to handle user clip planes */  	for (i = 0; i < vp->ucp.nr; i++) { -		struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, +		struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT,  						NV40_VP_INST_DEST_CLIP(i)); -		struct nv40_sreg ceqn = constant(vpc, -1, +		struct nvfx_sreg ceqn = constant(vpc, -1,  						 nvfx->clip.ucp[i][0],  						 nvfx->clip.ucp[i][1],  						 nvfx->clip.ucp[i][2],  						 nvfx->clip.ucp[i][3]); -		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; +		struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx];  		unsigned mask;  		switch (i) { @@ -814,10 +814,10 @@ nv40_vertprog_translate(struct nvfx_context *nvfx,  			goto out_err;  		} -		arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); +		arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none);  	} -	vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; +	vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;  	vp->translated = TRUE;  out_err:  	tgsi_parse_free(&parse); diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h new file mode 100644 index 0000000000..191131a40a --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -0,0 +1,407 @@ +#ifndef __NVFX_SHADER_H__ +#define __NVFX_SHADER_H__ + +/* this will resolve to either the NV30 or the NV40 version + * depending on the current hardware */ +/* unusual, but very fast and compact method */ +#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c)))) + +#define NVFX_VP_INST_SLOT_VEC 0 +#define NVFX_VP_INST_SLOT_SCA 1 + +#define NVFX_VP_INST_COND_FL  0 /* guess */ +#define NVFX_VP_INST_COND_LT  1 +#define NVFX_VP_INST_COND_EQ  2 +#define NVFX_VP_INST_COND_LE  3 +#define NVFX_VP_INST_COND_GT  4 +#define NVFX_VP_INST_COND_NE  5 +#define NVFX_VP_INST_COND_GE  6 +#define NVFX_VP_INST_COND_TR  7 /* guess */ + +#define NVFX_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */ +#define NVFX_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */ +#define NVFX_VP_INST_IN_NORMAL  2 +#define NVFX_VP_INST_IN_COL0  3    /* Should probably confirm them all though */ +#define NVFX_VP_INST_IN_COL1  4 +#define NVFX_VP_INST_IN_FOGC  5 +#define NVFX_VP_INST_IN_TC0  8 +#define NVFX_VP_INST_IN_TC(n)  (8+n) + +#define NVFX_VP_INST_SCA_OP_NOP 0x00 +#define NVFX_VP_INST_SCA_OP_MOV 0x01 +#define NVFX_VP_INST_SCA_OP_RCP 0x02 +#define NVFX_VP_INST_SCA_OP_RCC 0x03 +#define NVFX_VP_INST_SCA_OP_RSQ 0x04 +#define NVFX_VP_INST_SCA_OP_EXP 0x05 +#define NVFX_VP_INST_SCA_OP_LOG 0x06 +#define NVFX_VP_INST_SCA_OP_LIT 0x07 +#define NVFX_VP_INST_SCA_OP_BRA 0x09 +#define NVFX_VP_INST_SCA_OP_CAL 0x0B +#define NVFX_VP_INST_SCA_OP_RET 0x0C +#define NVFX_VP_INST_SCA_OP_LG2 0x0D +#define NVFX_VP_INST_SCA_OP_EX2 0x0E +#define NVFX_VP_INST_SCA_OP_SIN 0x0F +#define NVFX_VP_INST_SCA_OP_COS 0x10 + +#define NV40_VP_INST_SCA_OP_PUSHA 0x13 +#define NV40_VP_INST_SCA_OP_POPA 0x14 + +#define NVFX_VP_INST_VEC_OP_NOP 0x00 +#define NVFX_VP_INST_VEC_OP_MOV 0x01 +#define NVFX_VP_INST_VEC_OP_MUL 0x02 +#define NVFX_VP_INST_VEC_OP_ADD 0x03 +#define NVFX_VP_INST_VEC_OP_MAD 0x04 +#define NVFX_VP_INST_VEC_OP_DP3 0x05 +#define NVFX_VP_INST_VEC_OP_DPH 0x06 +#define NVFX_VP_INST_VEC_OP_DP4 0x07 +#define NVFX_VP_INST_VEC_OP_DST 0x08 +#define NVFX_VP_INST_VEC_OP_MIN 0x09 +#define NVFX_VP_INST_VEC_OP_MAX 0x0A +#define NVFX_VP_INST_VEC_OP_SLT 0x0B +#define NVFX_VP_INST_VEC_OP_SGE 0x0C +#define NVFX_VP_INST_VEC_OP_ARL 0x0D +#define NVFX_VP_INST_VEC_OP_FRC 0x0E +#define NVFX_VP_INST_VEC_OP_FLR 0x0F +#define NVFX_VP_INST_VEC_OP_SEQ 0x10 +#define NVFX_VP_INST_VEC_OP_SFL 0x11 +#define NVFX_VP_INST_VEC_OP_SGT 0x12 +#define NVFX_VP_INST_VEC_OP_SLE 0x13 +#define NVFX_VP_INST_VEC_OP_SNE 0x14 +#define NVFX_VP_INST_VEC_OP_STR 0x15 +#define NVFX_VP_INST_VEC_OP_SSG 0x16 +#define NVFX_VP_INST_VEC_OP_ARR 0x17 +#define NVFX_VP_INST_VEC_OP_ARA 0x18 + +#define NV40_VP_INST_VEC_OP_TXL 0x19 + +/* DWORD 3 */ +#define NVFX_VP_INST_LAST                           (1 << 0) + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + *   0 - Opcode, output reg/mask, ATTRIB source + *   1 - Source 0 + *   2 - Source 1 + *   3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + *     result.color == R0.xyzw + *     result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords.  As such instructions such as: + * + *     ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + *   TODO + * + * Non-native instructions: + *   LIT + *   LRP - MAD+MAD + *   SUB - ADD, negate second source + *   RSQ - LG2 + EX2 + *   POW - LG2 + MUL + EX2 + *   SCS - COS + SIN + *   XPD + * + * NV40 Looping + *   Loops appear to be fairly expensive on NV40 at least, the proprietary + *   driver goes to a lot of effort to avoid using the native looping + *   instructions.  If the total number of *executed* instructions between + *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + *   The maximum loop count is 255. + * + */ + +//== Opcode / Destination selection == +#define NVFX_FP_OP_PROGRAM_END          (1 << 0) +#define NVFX_FP_OP_OUT_REG_SHIFT        1 +#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */ +#define NV40_FP_OP_OUT_REG_MASK          (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NVFX_FP_OP_OUT_REG_HALF          (1 << 7) +#define NVFX_FP_OP_COND_WRITE_ENABLE        (1 << 8) +#define NVFX_FP_OP_OUTMASK_SHIFT        9 +#define NVFX_FP_OP_OUTMASK_MASK          (0xF << 9) +#  define NVFX_FP_OP_OUT_X  (1<<9) +#  define NVFX_FP_OP_OUT_Y  (1<<10) +#  define NVFX_FP_OP_OUT_Z  (1<<11) +#  define NVFX_FP_OP_OUT_W  (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NVFX_FP_OP_INPUT_SRC_SHIFT        13 +#define NVFX_FP_OP_INPUT_SRC_MASK        (15 << 13) +#  define NVFX_FP_OP_INPUT_SRC_POSITION  0x0 +#  define NVFX_FP_OP_INPUT_SRC_COL0  0x1 +#  define NVFX_FP_OP_INPUT_SRC_COL1  0x2 +#  define NVFX_FP_OP_INPUT_SRC_FOGC  0x3 +#  define NVFX_FP_OP_INPUT_SRC_TC0    0x4 +#  define NVFX_FP_OP_INPUT_SRC_TC(n)  (0x4 + n) +#  define NV40_FP_OP_INPUT_SRC_FACING  0xE +#define NVFX_FP_OP_TEX_UNIT_SHIFT        17 +#define NVFX_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */ +#define NVFX_FP_OP_PRECISION_SHIFT        22 +#define NVFX_FP_OP_PRECISION_MASK        (3 << 22) +#   define NVFX_FP_PRECISION_FP32  0 +#   define NVFX_FP_PRECISION_FP16  1 +#   define NVFX_FP_PRECISION_FX12  2 +#define NVFX_FP_OP_OPCODE_SHIFT          24 +#define NVFX_FP_OP_OPCODE_MASK          (0x3F << 24) +/* NV30/NV40 fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_NOP 0x00 +#define NVFX_FP_OP_OPCODE_MOV 0x01 +#define NVFX_FP_OP_OPCODE_MUL 0x02 +#define NVFX_FP_OP_OPCODE_ADD 0x03 +#define NVFX_FP_OP_OPCODE_MAD 0x04 +#define NVFX_FP_OP_OPCODE_DP3 0x05 +#define NVFX_FP_OP_OPCODE_DP4 0x06 +#define NVFX_FP_OP_OPCODE_DST 0x07 +#define NVFX_FP_OP_OPCODE_MIN 0x08 +#define NVFX_FP_OP_OPCODE_MAX 0x09 +#define NVFX_FP_OP_OPCODE_SLT 0x0A +#define NVFX_FP_OP_OPCODE_SGE 0x0B +#define NVFX_FP_OP_OPCODE_SLE 0x0C +#define NVFX_FP_OP_OPCODE_SGT 0x0D +#define NVFX_FP_OP_OPCODE_SNE 0x0E +#define NVFX_FP_OP_OPCODE_SEQ 0x0F +#define NVFX_FP_OP_OPCODE_FRC 0x10 +#define NVFX_FP_OP_OPCODE_FLR 0x11 +#define NVFX_FP_OP_OPCODE_KIL 0x12 +#define NVFX_FP_OP_OPCODE_PK4B 0x13 +#define NVFX_FP_OP_OPCODE_UP4B 0x14 +#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_TEX 0x17 +#define NVFX_FP_OP_OPCODE_TXP 0x18 +#define NVFX_FP_OP_OPCODE_TXD 0x19 +#define NVFX_FP_OP_OPCODE_RCP 0x1A +#define NVFX_FP_OP_OPCODE_EX2 0x1C +#define NVFX_FP_OP_OPCODE_LG2 0x1D +#define NVFX_FP_OP_OPCODE_STR 0x20 +#define NVFX_FP_OP_OPCODE_SFL 0x21 +#define NVFX_FP_OP_OPCODE_COS 0x22 +#define NVFX_FP_OP_OPCODE_SIN 0x23 +#define NVFX_FP_OP_OPCODE_PK2H 0x24 +#define NVFX_FP_OP_OPCODE_UP2H 0x25 +#define NVFX_FP_OP_OPCODE_PK4UB 0x27 +#define NVFX_FP_OP_OPCODE_UP4UB 0x28 +#define NVFX_FP_OP_OPCODE_PK2US 0x29 +#define NVFX_FP_OP_OPCODE_UP2US 0x2A +#define NVFX_FP_OP_OPCODE_DP2A 0x2E +#define NVFX_FP_OP_OPCODE_TXB 0x31 +#define NVFX_FP_OP_OPCODE_DIV 0x3A + +/* NV30 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B +#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E +#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F +#define NVFX_FP_OP_OPCODE_POW_NV30 0x26 +#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 + +/* NV40 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31 +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0 +#define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1 +#define NV40_FP_OP_BRA_OPCODE_IF                                     0x2 +#define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3 +#define NV40_FP_OP_BRA_OPCODE_REP                                    0x4 +#define NV40_FP_OP_BRA_OPCODE_RET                                    0x5 + +#define NVFX_FP_OP_OUT_SAT          (1 << 31) + +/* high order bits of SRC0 */ +#define NVFX_FP_OP_OUT_ABS          (1 << 29) +#define NVFX_FP_OP_COND_SWZ_W_SHIFT        27 +#define NVFX_FP_OP_COND_SWZ_W_MASK        (3 << 27) +#define NVFX_FP_OP_COND_SWZ_Z_SHIFT        25 +#define NVFX_FP_OP_COND_SWZ_Z_MASK        (3 << 25) +#define NVFX_FP_OP_COND_SWZ_Y_SHIFT        23 +#define NVFX_FP_OP_COND_SWZ_Y_MASK        (3 << 23) +#define NVFX_FP_OP_COND_SWZ_X_SHIFT        21 +#define NVFX_FP_OP_COND_SWZ_X_MASK        (3 << 21) +#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT        21 +#define NVFX_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21) +#define NVFX_FP_OP_COND_SHIFT          18 +#define NVFX_FP_OP_COND_MASK          (0x07 << 18) +#  define NVFX_FP_OP_COND_FL  0 +#  define NVFX_FP_OP_COND_LT  1 +#  define NVFX_FP_OP_COND_EQ  2 +#  define NVFX_FP_OP_COND_LE  3 +#  define NVFX_FP_OP_COND_GT  4 +#  define NVFX_FP_OP_COND_NE  5 +#  define NVFX_FP_OP_COND_GE  6 +#  define NVFX_FP_OP_COND_TR  7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31) +#define NVFX_FP_OP_DST_SCALE_SHIFT        28 +#define NVFX_FP_OP_DST_SCALE_MASK        (3 << 28) +#define NVFX_FP_OP_DST_SCALE_1X                                                0 +#define NVFX_FP_OP_DST_SCALE_2X                                                1 +#define NVFX_FP_OP_DST_SCALE_4X                                                2 +#define NVFX_FP_OP_DST_SCALE_8X                                                3 +#define NVFX_FP_OP_DST_SCALE_INV_2X                                            5 +#define NVFX_FP_OP_DST_SCALE_INV_4X                                            6 +#define NVFX_FP_OP_DST_SCALE_INV_8X                                            7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19 +#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10 +#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2 +#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT                                               2 +#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT                                                 2 +#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2) + +/* SRC1 REP + *   I have no idea why there are 3 count values here..  but they + *   have always been filled with the same value in my tests so + *   far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2 +#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10 +#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19 +#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT                                                2 +#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2) + +/* high order bits of SRC2 */ +#define NVFX_FP_OP_INDEX_INPUT          (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT        19 +#define NV40_FP_OP_ADDR_INDEX_MASK        (0xF << 19) + +//== Register selection == +#define NVFX_FP_REG_TYPE_SHIFT           0 +#define NVFX_FP_REG_TYPE_MASK           (3 << 0) +#  define NVFX_FP_REG_TYPE_TEMP   0 +#  define NVFX_FP_REG_TYPE_INPUT  1 +#  define NVFX_FP_REG_TYPE_CONST  2 +#define NVFX_FP_REG_SRC_SHIFT            2 +#define NV30_FP_REG_SRC_MASK              (31 << 2) +#define NV40_FP_REG_SRC_MASK              (63 << 2) +#define NVFX_FP_REG_SRC_HALF            (1 << 8) +#define NVFX_FP_REG_SWZ_ALL_SHIFT        9 +#define NVFX_FP_REG_SWZ_ALL_MASK        (255 << 9) +#define NVFX_FP_REG_SWZ_X_SHIFT          9 +#define NVFX_FP_REG_SWZ_X_MASK          (3 << 9) +#define NVFX_FP_REG_SWZ_Y_SHIFT          11 +#define NVFX_FP_REG_SWZ_Y_MASK          (3 << 11) +#define NVFX_FP_REG_SWZ_Z_SHIFT          13 +#define NVFX_FP_REG_SWZ_Z_MASK          (3 << 13) +#define NVFX_FP_REG_SWZ_W_SHIFT          15 +#define NVFX_FP_REG_SWZ_W_MASK          (3 << 15) +#  define NVFX_FP_SWIZZLE_X  0 +#  define NVFX_FP_SWIZZLE_Y  1 +#  define NVFX_FP_SWIZZLE_Z  2 +#  define NVFX_FP_SWIZZLE_W  3 +#define NVFX_FP_REG_NEGATE          (1 << 17) + +#ifndef NVFX_SHADER_NO_FUCKEDNESS +#define NVFXSR_NONE	0 +#define NVFXSR_OUTPUT	1 +#define NVFXSR_INPUT	2 +#define NVFXSR_TEMP	3 +#define NVFXSR_CONST	4 + +struct nvfx_sreg { +	int type; +	int index; + +	int dst_scale; + +	int negate; +	int abs; +	int swz[4]; + +	int cc_update; +	int cc_update_reg; +	int cc_test; +	int cc_test_reg; +	int cc_swz[4]; +}; + +static INLINE struct nvfx_sreg +nvfx_sr(int type, int index) +{ +	struct nvfx_sreg temp = { +		.type = type, +		.index = index, +		.dst_scale = DEF_SCALE, +		.abs = 0, +		.negate = 0, +		.swz = { 0, 1, 2, 3 }, +		.cc_update = 0, +		.cc_update_reg = 0, +		.cc_test = DEF_CTEST, +		.cc_test_reg = 0, +		.cc_swz = { 0, 1, 2, 3 }, +	}; +	return temp; +} + +static INLINE struct nvfx_sreg +nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) +{ +	struct nvfx_sreg dst = src; + +	dst.swz[SWZ_X] = src.swz[x]; +	dst.swz[SWZ_Y] = src.swz[y]; +	dst.swz[SWZ_Z] = src.swz[z]; +	dst.swz[SWZ_W] = src.swz[w]; +	return dst; +} + +static INLINE struct nvfx_sreg +nvfx_sr_neg(struct nvfx_sreg src) +{ +	src.negate = !src.negate; +	return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_abs(struct nvfx_sreg src) +{ +	src.abs = 1; +	return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_scale(struct nvfx_sreg src, int scale) +{ +	src.dst_scale = scale; +	return src; +} +#endif + +#endif | 
