/*
 * Copyright (C) 2005 Ben Skeggs.
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include "r300_fragprog.h"

#include "shader/prog_parameter.h"

#include "../r300_reg.h"

static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
	struct prog_src_register reg = { 0, };

	reg.File = PROGRAM_STATE_VAR;
	reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
	reg.Swizzle = SWIZZLE_WWWW;
	return reg;
}

/**
 * Transform TEX, TXP, TXB, and KIL instructions in the following way:
 *  - premultiply texture coordinates for RECT
 *  - extract operand swizzles
 *  - introduce a temporary register when write masks are needed
 */
GLboolean r300_transform_TEX(
	struct radeon_compiler * c,
	struct rc_instruction* inst,
	void* data)
{
	struct r300_fragment_program_compiler *compiler =
		(struct r300_fragment_program_compiler*)data;

	if (inst->I.Opcode != OPCODE_TEX &&
	    inst->I.Opcode != OPCODE_TXB &&
	    inst->I.Opcode != OPCODE_TXP &&
	    inst->I.Opcode != OPCODE_KIL)
		return GL_FALSE;

	/* ARB_shadow & EXT_shadow_funcs */
	if (inst->I.Opcode != OPCODE_KIL &&
	    c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
		GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;

		if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
			inst->I.Opcode = OPCODE_MOV;

			if (comparefunc == GL_ALWAYS) {
				inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
				inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
			} else {
				inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
			}

			return GL_TRUE;
		} else {
			GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
			GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
			struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
			struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
			struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
			int pass, fail;

			inst_rcp->I.Opcode = OPCODE_RCP;
			inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
			inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
			inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
			inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
			inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;

			inst_cmp->I.DstReg = inst->I.DstReg;
			inst->I.DstReg.File = PROGRAM_TEMPORARY;
			inst->I.DstReg.Index = rc_find_free_temporary(c);
			inst->I.DstReg.WriteMask = WRITEMASK_XYZW;

			inst_mad->I.Opcode = OPCODE_MAD;
			inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
			inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
			inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
			inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
			inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
			inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
			inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
			inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
			inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
			if (depthmode == 0) /* GL_LUMINANCE */
				inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
			else if (depthmode == 2) /* GL_ALPHA */
				inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;

			/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
			 *   r  < tex  <=>      -tex+r < 0
			 *   r >= tex  <=> not (-tex+r < 0 */
			if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
				inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
			else
				inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;

			inst_cmp->I.Opcode = OPCODE_CMP;
			/* DstReg has been filled out above */
			inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
			inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;

			if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
				pass = 1;
				fail = 2;
			} else {
				pass = 2;
				fail = 1;
			}

			inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
			inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
			inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
		}
	}

	/* Hardware uses [0..1]x[0..1] range for rectangle textures
	 * instead of [0..Width]x[0..Height].
	 * Add a scaling instruction.
	 */
	if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
		struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);

		inst_mul->I.Opcode = OPCODE_MUL;
		inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
		inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
		inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
		inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
		inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);

		reset_srcreg(&inst->I.SrcReg[0]);
		inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
		inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
	}

	/* Cannot write texture to output registers or with masks */
	if (inst->I.Opcode != OPCODE_KIL &&
	    (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);

		inst_mov->I.Opcode = OPCODE_MOV;
		inst_mov->I.DstReg = inst->I.DstReg;
		inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
		inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);

		inst->I.DstReg.File = PROGRAM_TEMPORARY;
		inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
		inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
	}


	/* Cannot read texture coordinate from constants file */
	if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);

		inst_mov->I.Opcode = OPCODE_MOV;
		inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
		inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
		inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];

		reset_srcreg(&inst->I.SrcReg[0]);
		inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
		inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
	}

	return GL_TRUE;
}

/* just some random things... */
void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
{
	struct r300_fragment_program_code *code = &c->code.r300;
	int n, i, j;
	static int pc = 0;

	fprintf(stderr, "pc=%d*************************************\n", pc++);

	fprintf(stderr, "Hardware program\n");
	fprintf(stderr, "----------------\n");

	for (n = 0; n <= (code->config & 3); n++) {
		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
		int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
		int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;

		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
			"alu_end: %d, tex_end: %d  (code_addr: %08x)\n", n,
			alu_offset, tex_offset, alu_end, tex_end, code_addr);

		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
			fprintf(stderr, "  TEX:\n");
			for (i = tex_offset;
			     i <= tex_offset + tex_end;
			     ++i) {
				const char *instr;

				switch ((code->tex.
					 inst[i] >> R300_TEX_INST_SHIFT) &
					15) {
				case R300_TEX_OP_LD:
					instr = "TEX";
					break;
				case R300_TEX_OP_KIL:
					instr = "KIL";
					break;
				case R300_TEX_OP_TXP:
					instr = "TXP";
					break;
				case R300_TEX_OP_TXB:
					instr = "TXB";
					break;
				default:
					instr = "UNKNOWN";
				}

				fprintf(stderr,
					"    %s t%i, %c%i, texture[%i]   (%08x)\n",
					instr,
					(code->tex.
					 inst[i] >> R300_DST_ADDR_SHIFT) & 31,
					't',
					(code->tex.
					 inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
					(code->tex.
					 inst[i] & R300_TEX_ID_MASK) >>
					R300_TEX_ID_SHIFT,
					code->tex.inst[i]);
			}
		}

		for (i = alu_offset;
		     i <= alu_offset + alu_end; ++i) {
			char srcc[3][10], dstc[20];
			char srca[3][10], dsta[20];
			char argc[3][20];
			char arga[3][20];
			char flags[5], tmp[10];

			for (j = 0; j < 3; ++j) {
				int regc = code->alu.inst[i].rgb_addr >> (j * 6);
				int rega = code->alu.inst[i].alpha_addr >> (j * 6);

				sprintf(srcc[j], "%c%i",
					(regc & 32) ? 'c' : 't', regc & 31);
				sprintf(srca[j], "%c%i",
					(rega & 32) ? 'c' : 't', rega & 31);
			}

			dstc[0] = 0;
			sprintf(flags, "%s%s%s",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
			if (flags[0] != 0) {
				sprintf(dstc, "t%i.%s ",
					(code->alu.inst[i].
					 rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
					flags);
			}
			sprintf(flags, "%s%s%s",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
				(code->alu.inst[i].
				 rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
			if (flags[0] != 0) {
				sprintf(tmp, "o%i.%s",
					(code->alu.inst[i].
					 rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
					flags);
				strcat(dstc, tmp);
			}

			dsta[0] = 0;
			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
				sprintf(dsta, "t%i.w ",
					(code->alu.inst[i].
					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
			}
			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
				sprintf(tmp, "o%i.w ",
					(code->alu.inst[i].
					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
				strcat(dsta, tmp);
			}
			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
				strcat(dsta, "Z");
			}

			fprintf(stderr,
				"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
				"       w: %3s %3s %3s -> %-20s (%08x)\n", i,
				srcc[0], srcc[1], srcc[2], dstc,
				code->alu.inst[i].rgb_addr, srca[0], srca[1],
				srca[2], dsta, code->alu.inst[i].alpha_addr);

			for (j = 0; j < 3; ++j) {
				int regc = code->alu.inst[i].rgb_inst >> (j * 7);
				int rega = code->alu.inst[i].alpha_inst >> (j * 7);
				int d;
				char buf[20];

				d = regc & 31;
				if (d < 12) {
					switch (d % 4) {
					case R300_ALU_ARGC_SRC0C_XYZ:
						sprintf(buf, "%s.xyz",
							srcc[d / 4]);
						break;
					case R300_ALU_ARGC_SRC0C_XXX:
						sprintf(buf, "%s.xxx",
							srcc[d / 4]);
						break;
					case R300_ALU_ARGC_SRC0C_YYY:
						sprintf(buf, "%s.yyy",
							srcc[d / 4]);
						break;
					case R300_ALU_ARGC_SRC0C_ZZZ:
						sprintf(buf, "%s.zzz",
							srcc[d / 4]);
						break;
					}
				} else if (d < 15) {
					sprintf(buf, "%s.www", srca[d - 12]);
				} else if (d == 20) {
					sprintf(buf, "0.0");
				} else if (d == 21) {
					sprintf(buf, "1.0");
				} else if (d == 22) {
					sprintf(buf, "0.5");
				} else if (d >= 23 && d < 32) {
					d -= 23;
					switch (d / 3) {
					case 0:
						sprintf(buf, "%s.yzx",
							srcc[d % 3]);
						break;
					case 1:
						sprintf(buf, "%s.zxy",
							srcc[d % 3]);
						break;
					case 2:
						sprintf(buf, "%s.Wzy",
							srcc[d % 3]);
						break;
					}
				} else {
					sprintf(buf, "%i", d);
				}

				sprintf(argc[j], "%s%s%s%s",
					(regc & 32) ? "-" : "",
					(regc & 64) ? "|" : "",
					buf, (regc & 64) ? "|" : "");

				d = rega & 31;
				if (d < 9) {
					sprintf(buf, "%s.%c", srcc[d / 3],
						'x' + (char)(d % 3));
				} else if (d < 12) {
					sprintf(buf, "%s.w", srca[d - 9]);
				} else if (d == 16) {
					sprintf(buf, "0.0");
				} else if (d == 17) {
					sprintf(buf, "1.0");
				} else if (d == 18) {
					sprintf(buf, "0.5");
				} else {
					sprintf(buf, "%i", d);
				}

				sprintf(arga[j], "%s%s%s%s",
					(rega & 32) ? "-" : "",
					(rega & 64) ? "|" : "",
					buf, (rega & 64) ? "|" : "");
			}

			fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x\n"
				"       w: %8s %8s %8s    op: %08x\n",
				argc[0], argc[1], argc[2],
				code->alu.inst[i].rgb_inst, arga[0], arga[1],
				arga[2], code->alu.inst[i].alpha_inst);
		}
	}
}