diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 64 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 2 | 
3 files changed, 63 insertions, 5 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 9091317ada..e2d52c3a46 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -108,7 +108,6 @@ static struct r600_bc_alu *r600_bc_alu(void)  	if (alu == NULL)  		return NULL;  	LIST_INITHEAD(&alu->list); -	LIST_INITHEAD(&alu->bs_list);  	return alu;  } @@ -560,6 +559,63 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al  	return -1;  } +static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_alu *alu_prev) +{ +	struct r600_bc_alu *slots[5]; +	int gpr[5], chan[5]; +	int i, j, r, src, num_src; +	 +	r = assign_alu_units(alu_prev, slots); +	if (r) +		return r; + +	for (i = 0; i < 5; ++i) { +		if(slots[i] && slots[i]->dst.write && !slots[i]->dst.rel) { +			gpr[i] = slots[i]->dst.sel; +			if (is_alu_reduction_inst(slots[i])) +				chan[i] = 0; +			else +				chan[i] = slots[i]->dst.chan; +		} else +			gpr[i] = -1; +		 +	} + +	r = assign_alu_units(alu_first, slots); +	if (r) +		return r; + +	for (i = 0; i < 5; ++i) { +		struct r600_bc_alu *alu = slots[i]; +		if(!alu) +			continue; + +		num_src = r600_bc_get_num_operands(alu); +		for (src = 0; src < num_src; ++src) { +			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) +				continue; + +			if (alu->src[src].sel == gpr[4] && +				alu->src[src].chan == chan[4]) { +				alu->src[src].sel = V_SQ_ALU_SRC_PS; +				alu->src[src].chan = 0; +				continue; +			} + +			for (j = 0; j < 4; ++j) { +				if (alu->src[src].sel == gpr[j] && +					alu->src[src].chan == j) { +					alu->src[src].sel = V_SQ_ALU_SRC_PV; +					alu->src[src].chan = chan[j]; +					break; +				} +			} +		} +	} + +	return 0; +} +  /* This code handles kcache lines as single blocks of 32 constants. We could   * probably do slightly better by recognizing that we actually have two   * consecutive lines of 16 constants, but the resulting code would also be @@ -718,9 +774,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int  	if (!bc->cf_last->curr_bs_head) {  		bc->cf_last->curr_bs_head = nalu; -		LIST_INITHEAD(&nalu->bs_list); -	} else { -		LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);  	}  	/* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)  	 * worst case */ @@ -757,9 +810,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int  	/* process cur ALU instructions for bank swizzle */  	if (nalu->last) { +		if (bc->cf_last->prev_bs_head) +			replace_gpr_with_pv_ps(bc->cf_last->curr_bs_head, bc->cf_last->prev_bs_head);  		r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);  		if (r)  			return r; +		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;  		bc->cf_last->curr_bs_head = NULL;  	}  	return 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index a9123299f1..2a046d1e88 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -46,7 +46,6 @@ struct r600_bc_alu_dst {  struct r600_bc_alu {  	struct list_head		list; -	struct list_head		bs_list; /* bank swizzle list */  	struct r600_bc_alu_src		src[3];  	struct r600_bc_alu_dst		dst;  	unsigned			inst; @@ -144,6 +143,7 @@ struct r600_bc_cf {  	struct list_head		vtx;  	struct r600_bc_output		output;  	struct r600_bc_alu		*curr_bs_head; +	struct r600_bc_alu		*prev_bs_head;  };  #define FC_NONE				0 diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index d812bfd1fe..56ed35e8b3 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -191,6 +191,8 @@  #define     V_SQ_ALU_SRC_M_1_INT                                     0x000000FB  #define     V_SQ_ALU_SRC_0_5                                         0x000000FC  #define     V_SQ_ALU_SRC_LITERAL                                     0x000000FD +#define     V_SQ_ALU_SRC_PV                                          0x000000FE +#define     V_SQ_ALU_SRC_PS                                          0x000000FF  #define     V_SQ_ALU_SRC_PARAM_BASE                                  0x000001C0  #define   S_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) & 0x1) << 9)  #define   G_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) >> 9) & 0x1)  | 
