summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt1127
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c146
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.h5
6 files changed, 182 insertions, 1163 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
deleted file mode 100644
index 5d9eed9258..0000000000
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ /dev/null
@@ -1,1127 +0,0 @@
-TGSI Instruction Specification
-==============================
-==============================
-
-
-1 Instruction Set Operations
-=============================
-
-
-1.1 GL_NV_vertex_program
--------------------------
-
-
-1.1.1 ARL - Address Register Load
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
- dst.w = floor(src.w)
-
-
-1.1.2 MOV - Move
-
- dst.x = src.x
- dst.y = src.y
- dst.z = src.z
- dst.w = src.w
-
-
-1.1.3 LIT - Light Coefficients
-
- dst.x = 1.0
- dst.y = max(src.x, 0.0)
- dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
- dst.w = 1.0
-
-
-1.1.4 RCP - Reciprocal
-
- dst.x = 1.0 / src.x
- dst.y = 1.0 / src.x
- dst.z = 1.0 / src.x
- dst.w = 1.0 / src.x
-
-
-1.1.5 RSQ - Reciprocal Square Root
-
- dst.x = 1.0 / sqrt(abs(src.x))
- dst.y = 1.0 / sqrt(abs(src.x))
- dst.z = 1.0 / sqrt(abs(src.x))
- dst.w = 1.0 / sqrt(abs(src.x))
-
-
-1.1.6 EXP - Approximate Exponential Base 2
-
- dst.x = pow(2.0, floor(src.x))
- dst.y = src.x - floor(src.x)
- dst.z = pow(2.0, src.x)
- dst.w = 1.0
-
-
-1.1.7 LOG - Approximate Logarithm Base 2
-
- dst.x = floor(lg2(abs(src.x)))
- dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
- dst.z = lg2(abs(src.x))
- dst.w = 1.0
-
-
-1.1.8 MUL - Multiply
-
- dst.x = src0.x * src1.x
- dst.y = src0.y * src1.y
- dst.z = src0.z * src1.z
- dst.w = src0.w * src1.w
-
-
-1.1.9 ADD - Add
-
- dst.x = src0.x + src1.x
- dst.y = src0.y + src1.y
- dst.z = src0.z + src1.z
- dst.w = src0.w + src1.w
-
-
-1.1.10 DP3 - 3-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
-
-
-1.1.11 DP4 - 4-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
-
-
-1.1.12 DST - Distance Vector
-
- dst.x = 1.0
- dst.y = src0.y * src1.y
- dst.z = src0.z
- dst.w = src1.w
-
-
-1.1.13 MIN - Minimum
-
- dst.x = min(src0.x, src1.x)
- dst.y = min(src0.y, src1.y)
- dst.z = min(src0.z, src1.z)
- dst.w = min(src0.w, src1.w)
-
-
-1.1.14 MAX - Maximum
-
- dst.x = max(src0.x, src1.x)
- dst.y = max(src0.y, src1.y)
- dst.z = max(src0.z, src1.z)
- dst.w = max(src0.w, src1.w)
-
-
-1.1.15 SLT - Set On Less Than
-
- dst.x = (src0.x < src1.x) ? 1.0 : 0.0
- dst.y = (src0.y < src1.y) ? 1.0 : 0.0
- dst.z = (src0.z < src1.z) ? 1.0 : 0.0
- dst.w = (src0.w < src1.w) ? 1.0 : 0.0
-
-
-1.1.16 SGE - Set On Greater Equal Than
-
- dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
- dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
- dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
- dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
-
-
-1.1.17 MAD - Multiply And Add
-
- dst.x = src0.x * src1.x + src2.x
- dst.y = src0.y * src1.y + src2.y
- dst.z = src0.z * src1.z + src2.z
- dst.w = src0.w * src1.w + src2.w
-
-
-1.2 GL_ATI_fragment_shader
----------------------------
-
-
-1.2.1 SUB - Subtract
-
- dst.x = src0.x - src1.x
- dst.y = src0.y - src1.y
- dst.z = src0.z - src1.z
- dst.w = src0.w - src1.w
-
-
-1.2.2 DOT3 - 3-component Dot Product
-
- Alias for DP3.
-
-
-1.2.3 DOT4 - 4-component Dot Product
-
- Alias for DP4.
-
-
-1.2.4 LERP - Linear Interpolate
-
- dst.x = src0.x * (src1.x - src2.x) + src2.x
- dst.y = src0.y * (src1.y - src2.y) + src2.y
- dst.z = src0.z * (src1.z - src2.z) + src2.z
- dst.w = src0.w * (src1.w - src2.w) + src2.w
-
-
-1.2.5 CND - Condition
-
- dst.x = (src2.x > 0.5) ? src0.x : src1.x
- dst.y = (src2.y > 0.5) ? src0.y : src1.y
- dst.z = (src2.z > 0.5) ? src0.z : src1.z
- dst.w = (src2.w > 0.5) ? src0.w : src1.w
-
-
-1.2.6 CND0 - Condition Zero
-
- Removed. Use (CMP src2, src1, src0) instead.
-
-1.2.7 DOT2ADD - 2-component Dot Product And Add
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
- dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
-
-
-1.3 GL_EXT_vertex_shader
--------------------------
-
-
-1.3.1 INDEX - Array Lookup
-
- Considered for removal from language.
-
-
-1.3.2 NEGATE - Negate
-
- Considered for removal from language.
-
-
-1.3.3 MADD - Multiply And Add
-
- Alias for MAD.
-
-
-1.3.4 FRAC - Fraction
-
- dst.x = src.x - floor(src.x)
- dst.y = src.y - floor(src.y)
- dst.z = src.z - floor(src.z)
- dst.w = src.w - floor(src.w)
-
-
-1.3.5 SETGE - Set On Greater Equal
-
- Alias for SGE.
-
-
-1.3.6 SETLT - Set On Less Than
-
- Alias for SLT.
-
-
-1.3.7 CLAMP - Clamp
-
- dst.x = clamp(src0.x, src1.x, src2.x)
- dst.y = clamp(src0.y, src1.y, src2.y)
- dst.z = clamp(src0.z, src1.z, src2.z)
- dst.w = clamp(src0.w, src1.w, src2.w)
-
-
-1.3.8 FLOOR - Floor
-
- dst.x = floor(src.x)
- dst.y = floor(src.y)
- dst.z = floor(src.z)
- dst.w = floor(src.w)
-
-
-1.3.9 ROUND - Round
-
- dst.x = round(src.x)
- dst.y = round(src.y)
- dst.z = round(src.z)
- dst.w = round(src.w)
-
-
-1.3.10 EXPBASE2 - Exponential Base 2
-
- dst.x = pow(2.0, src.x)
- dst.y = pow(2.0, src.x)
- dst.z = pow(2.0, src.x)
- dst.w = pow(2.0, src.x)
-
-
-1.3.11 LOGBASE2 - Logarithm Base 2
-
- dst.x = lg2(src.x)
- dst.y = lg2(src.x)
- dst.z = lg2(src.x)
- dst.w = lg2(src.x)
-
-
-1.3.12 POWER - Power
-
- dst.x = pow(src0.x, src1.x)
- dst.y = pow(src0.x, src1.x)
- dst.z = pow(src0.x, src1.x)
- dst.w = pow(src0.x, src1.x)
-
-
-1.3.13 RECIP - Reciprocal
-
- Alias for RCP.
-
-
-1.3.14 RECIPSQRT - Reciprocal Square Root
-
- Alias for RSQ.
-
-
-1.3.15 CROSSPRODUCT - Cross Product
-
- dst.x = src0.y * src1.z - src1.y * src0.z
- dst.y = src0.z * src1.x - src1.z * src0.x
- dst.z = src0.x * src1.y - src1.x * src0.y
- dst.w = 1.0
-
-
-1.3.16 MULTIPLYMATRIX - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.4 GL_NV_vertex_program1_1
-----------------------------
-
-
-1.4.1 ABS - Absolute
-
- dst.x = abs(src.x)
- dst.y = abs(src.y)
- dst.z = abs(src.z)
- dst.w = abs(src.w)
-
-
-1.4.2 RCC - Reciprocal Clamped
-
- dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
- dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
-
-
-1.4.3 DPH - Homogeneous Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
- dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
-
-
-1.5 GL_NV_fragment_program
----------------------------
-
-
-1.5.1 COS - Cosine
-
- dst.x = cos(src.x)
- dst.y = cos(src.x)
- dst.z = cos(src.x)
- dst.w = cos(src.w)
-
-
-1.5.2 DDX - Derivative Relative To X
-
- dst.x = partialx(src.x)
- dst.y = partialx(src.y)
- dst.z = partialx(src.z)
- dst.w = partialx(src.w)
-
-
-1.5.3 DDY - Derivative Relative To Y
-
- dst.x = partialy(src.x)
- dst.y = partialy(src.y)
- dst.z = partialy(src.z)
- dst.w = partialy(src.w)
-
-
-1.5.4 EX2 - Exponential Base 2
-
- Alias for EXPBASE2.
-
-
-1.5.5 FLR - Floor
-
- Alias for FLOOR.
-
-
-1.5.6 FRC - Fraction
-
- Alias for FRAC.
-
-
-1.5.7 KILP - Predicated Discard
-
- discard
-
-
-1.5.8 LG2 - Logarithm Base 2
-
- Alias for LOGBASE2.
-
-
-1.5.9 LRP - Linear Interpolate
-
- Alias for LERP.
-
-
-1.5.10 PK2H - Pack Two 16-bit Floats
-
- TBD
-
-
-1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
-
- TBD
-
-
-1.5.12 PK4B - Pack Four Signed 8-bit Scalars
-
- TBD
-
-
-1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
-
- TBD
-
-
-1.5.14 POW - Power
-
- Alias for POWER.
-
-
-1.5.15 RFL - Reflection Vector
-
- dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
- dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
- dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
- dst.w = 1.0
-
-
-1.5.16 SEQ - Set On Equal
-
- dst.x = (src0.x == src1.x) ? 1.0 : 0.0
- dst.y = (src0.y == src1.y) ? 1.0 : 0.0
- dst.z = (src0.z == src1.z) ? 1.0 : 0.0
- dst.w = (src0.w == src1.w) ? 1.0 : 0.0
-
-
-1.5.17 SFL - Set On False
-
- dst.x = 0.0
- dst.y = 0.0
- dst.z = 0.0
- dst.w = 0.0
-
-
-1.5.18 SGT - Set On Greater Than
-
- dst.x = (src0.x > src1.x) ? 1.0 : 0.0
- dst.y = (src0.y > src1.y) ? 1.0 : 0.0
- dst.z = (src0.z > src1.z) ? 1.0 : 0.0
- dst.w = (src0.w > src1.w) ? 1.0 : 0.0
-
-
-1.5.19 SIN - Sine
-
- dst.x = sin(src.x)
- dst.y = sin(src.x)
- dst.z = sin(src.x)
- dst.w = sin(src.w)
-
-
-1.5.20 SLE - Set On Less Equal Than
-
- dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
- dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
- dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
- dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
-
-
-1.5.21 SNE - Set On Not Equal
-
- dst.x = (src0.x != src1.x) ? 1.0 : 0.0
- dst.y = (src0.y != src1.y) ? 1.0 : 0.0
- dst.z = (src0.z != src1.z) ? 1.0 : 0.0
- dst.w = (src0.w != src1.w) ? 1.0 : 0.0
-
-
-1.5.22 STR - Set On True
-
- dst.x = 1.0
- dst.y = 1.0
- dst.z = 1.0
- dst.w = 1.0
-
-
-1.5.23 TEX - Texture Lookup
-
- TBD
-
-
-1.5.24 TXD - Texture Lookup with Derivatives
-
- TBD
-
-
-1.5.25 TXP - Projective Texture Lookup
-
- TBD
-
-
-1.5.26 UP2H - Unpack Two 16-Bit Floats
-
- TBD
-
-
-1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
-
- TBD
-
-
-1.5.28 UP4B - Unpack Four Signed 8-Bit Values
-
- TBD
-
-
-1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
-
- TBD
-
-
-1.5.30 X2D - 2D Coordinate Transformation
-
- dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
- dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
- dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
- dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
-
-
-1.6 GL_NV_vertex_program2
---------------------------
-
-
-1.6.1 ARA - Address Register Add
-
- TBD
-
-
-1.6.2 ARR - Address Register Load With Round
-
- dst.x = round(src.x)
- dst.y = round(src.y)
- dst.z = round(src.z)
- dst.w = round(src.w)
-
-
-1.6.3 BRA - Branch
-
- pc = target
-
-
-1.6.4 CAL - Subroutine Call
-
- push(pc)
- pc = target
-
-
-1.6.5 RET - Subroutine Call Return
-
- pc = pop()
-
-
-1.6.6 SSG - Set Sign
-
- dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
- dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
- dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
- dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
-
-
-1.7 GL_ARB_vertex_program
---------------------------
-
-
-1.7.1 SWZ - Extended Swizzle
-
- dst.x = src.x
- dst.y = src.y
- dst.z = src.z
- dst.w = src.w
-
-
-1.7.2 XPD - Cross Product
-
- Alias for CROSSPRODUCT.
-
-
-1.8 GL_ARB_fragment_program
-----------------------------
-
-
-1.8.1 CMP - Compare
-
- dst.x = (src0.x < 0.0) ? src1.x : src2.x
- dst.y = (src0.y < 0.0) ? src1.y : src2.y
- dst.z = (src0.z < 0.0) ? src1.z : src2.z
- dst.w = (src0.w < 0.0) ? src1.w : src2.w
-
-
-1.8.2 KIL - Conditional Discard
-
- if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
- discard
- endif
-
-
-1.8.3 SCS - Sine Cosine
-
- dst.x = cos(src.x)
- dst.y = sin(src.x)
- dst.z = 0.0
- dst.y = 1.0
-
-
-1.8.4 TXB - Texture Lookup With Bias
-
- TBD
-
-
-1.9 GL_NV_fragment_program2
-----------------------------
-
-
-1.9.1 NRM - 3-component Vector Normalise
-
- dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
- dst.w = 1.0
-
-
-1.9.2 DIV - Divide
-
- dst.x = src0.x / src1.x
- dst.y = src0.y / src1.y
- dst.z = src0.z / src1.z
- dst.w = src0.w / src1.w
-
-
-1.9.3 DP2 - 2-component Dot Product
-
- dst.x = src0.x * src1.x + src0.y * src1.y
- dst.y = src0.x * src1.x + src0.y * src1.y
- dst.z = src0.x * src1.x + src0.y * src1.y
- dst.w = src0.x * src1.x + src0.y * src1.y
-
-
-1.9.4 DP2A - 2-component Dot Product And Add
-
- Alias for DOT2ADD.
-
-
-1.9.5 TXL - Texture Lookup With LOD
-
- TBD
-
-
-1.9.6 BRK - Break
-
- TBD
-
-
-1.9.7 IF - If
-
- TBD
-
-
-1.9.10 ELSE - Else
-
- TBD
-
-
-1.9.11 ENDIF - End If
-
- TBD
-
-
-1.10 GL_NV_vertex_program3
----------------------------
-
-
-1.10.1 PUSHA - Push Address Register On Stack
-
- push(src.x)
- push(src.y)
- push(src.z)
- push(src.w)
-
-
-1.10.2 POPA - Pop Address Register From Stack
-
- dst.w = pop()
- dst.z = pop()
- dst.y = pop()
- dst.x = pop()
-
-
-1.11 GL_NV_gpu_program4
-------------------------
-
-
-1.11.1 CEIL - Ceiling
-
- dst.x = ceil(src.x)
- dst.y = ceil(src.y)
- dst.z = ceil(src.z)
- dst.w = ceil(src.w)
-
-
-1.11.2 I2F - Integer To Float
-
- dst.x = (float) src.x
- dst.y = (float) src.y
- dst.z = (float) src.z
- dst.w = (float) src.w
-
-
-1.11.3 NOT - Bitwise Not
-
- dst.x = ~src.x
- dst.y = ~src.y
- dst.z = ~src.z
- dst.w = ~src.w
-
-
-1.11.4 TRUNC - Truncate
-
- dst.x = trunc(src.x)
- dst.y = trunc(src.y)
- dst.z = trunc(src.z)
- dst.w = trunc(src.w)
-
-
-1.11.5 SHL - Shift Left
-
- dst.x = src0.x << src1.x
- dst.y = src0.y << src1.x
- dst.z = src0.z << src1.x
- dst.w = src0.w << src1.x
-
-
-1.11.6 SHR - Shift Right
-
- dst.x = src0.x >> src1.x
- dst.y = src0.y >> src1.x
- dst.z = src0.z >> src1.x
- dst.w = src0.w >> src1.x
-
-
-1.11.7 AND - Bitwise And
-
- dst.x = src0.x & src1.x
- dst.y = src0.y & src1.y
- dst.z = src0.z & src1.z
- dst.w = src0.w & src1.w
-
-
-1.11.8 OR - Bitwise Or
-
- dst.x = src0.x | src1.x
- dst.y = src0.y | src1.y
- dst.z = src0.z | src1.z
- dst.w = src0.w | src1.w
-
-
-1.11.9 MOD - Modulus
-
- dst.x = src0.x % src1.x
- dst.y = src0.y % src1.y
- dst.z = src0.z % src1.z
- dst.w = src0.w % src1.w
-
-
-1.11.10 XOR - Bitwise Xor
-
- dst.x = src0.x ^ src1.x
- dst.y = src0.y ^ src1.y
- dst.z = src0.z ^ src1.z
- dst.w = src0.w ^ src1.w
-
-
-1.11.11 SAD - Sum Of Absolute Differences
-
- dst.x = abs(src0.x - src1.x) + src2.x
- dst.y = abs(src0.y - src1.y) + src2.y
- dst.z = abs(src0.z - src1.z) + src2.z
- dst.w = abs(src0.w - src1.w) + src2.w
-
-
-1.11.12 TXF - Texel Fetch
-
- TBD
-
-
-1.11.13 TXQ - Texture Size Query
-
- TBD
-
-
-1.11.14 CONT - Continue
-
- TBD
-
-
-1.12 GL_NV_geometry_program4
------------------------------
-
-
-1.12.1 EMIT - Emit
-
- TBD
-
-
-1.12.2 ENDPRIM - End Primitive
-
- TBD
-
-
-1.13 GLSL
-----------
-
-
-1.13.1 BGNLOOP - Begin a Loop
-
- TBD
-
-
-1.13.2 BGNSUB - Begin Subroutine
-
- TBD
-
-
-1.13.3 ENDLOOP - End a Loop
-
- TBD
-
-
-1.13.4 ENDSUB - End Subroutine
-
- TBD
-
-
-1.13.5 INT - Truncate
-
- Alias for TRUNC.
-
-
-1.13.6 NOISE1 - 1D Noise
-
- TBD
-
-
-1.13.7 NOISE2 - 2D Noise
-
- TBD
-
-
-1.13.8 NOISE3 - 3D Noise
-
- TBD
-
-
-1.13.9 NOISE4 - 4D Noise
-
- TBD
-
-
-1.13.10 NOP - No Operation
-
- Do nothing.
-
-
-1.14 ps_1_1
-------------
-
-
-1.14.1 TEXKILL - Conditional Discard
-
- Alias for KIL.
-
-
-1.15 ps_1_4
-------------
-
-
-1.15.1 TEXLD - Texture Lookup
-
- Alias for TEX.
-
-
-1.16 ps_2_0
-------------
-
-
-1.16.1 M4X4 - Multiply Matrix
-
- Alias for MULTIPLYMATRIX.
-
-
-1.16.2 M4X3 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.3 M3X4 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.4 M3X3 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.5 M3X2 - Multiply Matrix
-
- Considered for removal from language.
-
-
-1.16.6 CRS - Cross Product
-
- Alias for XPD.
-
-
-1.16.7 NRM4 - 4-component Vector Normalise
-
- dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
- dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
-
-
-1.16.8 SINCOS - Sine Cosine
-
- Alias for SCS.
-
-
-1.16.9 TEXLDB - Texture Lookup With Bias
-
- Alias for TXB.
-
-
-1.16.10 DP2ADD - 2-component Dot Product And Add
-
- Alias for DP2A.
-
-
-1.17 ps_2_x
-------------
-
-
-1.17.1 CALL - Subroutine Call
-
- Alias for CAL.
-
-
-1.17.2 CALLNZ - Subroutine Call If Not Zero
-
- TBD
-
-
-1.17.3 IFC - If
-
- TBD
-
-
-1.17.4 BREAK - Break
-
- Alias for BRK.
-
-
-1.17.5 BREAKC - Break Conditional
-
- TBD
-
-
-1.17.6 DSX - Derivative Relative To X
-
- Alias for DDX.
-
-
-1.17.7 DSY - Derivative Relative To Y
-
- Alias for DDY.
-
-
-1.17.8 TEXLDD - Texture Lookup with Derivatives
-
- Alias for TXD.
-
-
-1.18 vs_1_1
-------------
-
-
-1.18.1 EXPP - Approximate Exponential Base 2
-
- Use EXP. See also 1.19.3.
-
-
-1.18.2 LOGP - Logarithm Base 2
-
- Use LOG. See also 1.19.4.
-
-
-1.19 vs_2_0
-------------
-
-
-1.19.1 SGN - Set Sign
-
- Alias for SSG.
-
-
-1.19.2 MOVA - Move Address Register
-
- Alias for ARR.
-
-
-1.19.3 EXPP - Approximate Exponential Base 2
-
- Use EX2.
-
-
-1.19.4 LOGP - Logarithm Base 2
-
- Use LG2.
-
-
-2 Explanation of symbols used
-==============================
-
-
-2.1 Functions
---------------
-
-
- abs(x) Absolute value of x.
- |x|
- (x < 0.0) ? -x : x
-
- ceil(x) Ceiling of x.
-
- clamp(x,y,z) Clamp x between y and z.
- (x < y) ? y : (x > z) ? z : x
-
- cos(x) Cosine of x.
-
- floor(x) Floor of x.
-
- lg2(x) Logarithm base 2 of x.
-
- max(x,y) Maximum of x and y.
- (x > y) ? x : y
-
- min(x,y) Minimum of x and y.
- (x < y) ? x : y
-
- partialx(x) Derivative of x relative to fragment's X.
-
- partialy(x) Derivative of x relative to fragment's Y.
-
- pop() Pop from stack.
-
- pow(x,y) Raise x to power of y.
-
- push(x) Push x on stack.
-
- round(x) Round x.
-
- sin(x) Sine of x.
-
- sqrt(x) Square root of x.
-
- trunc(x) Truncate x.
-
-
-2.2 Keywords
--------------
-
-
- discard Discard fragment.
-
- dst First destination register.
-
- dst0 First destination register.
-
- pc Program counter.
-
- src First source register.
-
- src0 First source register.
-
- src1 Second source register.
-
- src2 Third source register.
-
- target Label of target instruction.
-
-
-3 Other tokens
-===============
-
-
-3.1 Declaration Semantic
--------------------------
-
-
- Follows Declaration token if Semantic bit is set.
-
- Since its purpose is to link a shader with other stages of the pipeline,
- it is valid to follow only those Declaration tokens that declare a register
- either in INPUT or OUTPUT file.
-
- SemanticName field contains the semantic name of the register being declared.
- There is no default value.
-
- SemanticIndex is an optional subscript that can be used to distinguish
- different register declarations with the same semantic name. The default value
- is 0.
-
- The meanings of the individual semantic names are explained in the following
- sections.
-
-
-3.1.1 FACE
-
- Valid only in a fragment shader INPUT declaration.
-
- FACE.x is negative when the primitive is back facing. FACE.x is positive
- when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index e0c5d3d3d6..ced9c94f46 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -36,6 +36,7 @@
#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
@@ -84,25 +85,28 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
{
const struct tgsi_full_instruction *fullinst
= &parse.FullToken.FullInstruction;
+ uint i;
assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
info->opcode_count[fullinst->Instruction.Opcode]++;
- /* check if we read the frag shader FOG or FACE inputs */
- if (procType == TGSI_PROCESSOR_FRAGMENT) {
- uint i;
- for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *src =
- &fullinst->Src[i];
- if (src->Register.File == TGSI_FILE_INPUT ||
- src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
- const int ind = src->Register.Index;
- if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
- info->uses_fogcoord = TRUE;
- }
- else if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FACE) {
- info->uses_frontfacing = TRUE;
+ for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *src =
+ &fullinst->Src[i];
+ int ind = src->Register.Index;
+
+ /* Mark which inputs are effectively used */
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ unsigned usage_mask;
+ usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i);
+ if (src->Register.Indirect) {
+ for (ind = 0; ind < info->num_inputs; ++ind) {
+ info->input_usage_mask[ind] |= usage_mask;
}
+ } else {
+ assert(ind >= 0);
+ assert(ind < PIPE_MAX_SHADER_INPUTS);
+ info->input_usage_mask[ind] |= usage_mask;
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 27de33f799..e75280336f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
@@ -61,8 +62,6 @@ struct tgsi_shader_info
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
- boolean uses_fogcoord; /**< fragment shader uses fog coord? */
- boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
struct {
unsigned name;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d5061f8b51..785a9fb035 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1244,16 +1244,9 @@ emit_sub(
make_xmm( xmm_src ) );
}
-
-
-
-
-
-
/**
* Register fetch.
*/
-
static void
emit_fetch(
struct x86_function *func,
@@ -1338,7 +1331,6 @@ emit_fetch(
/**
* Register store.
*/
-
static void
emit_store(
struct x86_function *func,
@@ -1455,7 +1447,6 @@ fetch_texel( struct tgsi_sampler **sampler,
/**
* High-level instruction translators.
*/
-
static void
emit_tex( struct x86_function *func,
const struct tgsi_full_instruction *inst,
@@ -1507,7 +1498,6 @@ emit_tex( struct x86_function *func,
get_temp( TEMP_R0, 3 ),
make_xmm( 3 ) );
-
if (projected) {
FETCH( func, *inst, 3, 0, 3 );
@@ -1535,7 +1525,6 @@ emit_tex( struct x86_function *func,
args[0] = get_temp( TEMP_R0, 0 );
args[1] = get_sampler_ptr( unit );
-
emit_func_call( func,
0,
args,
@@ -1569,7 +1558,8 @@ emit_kil(
/* This mask stores component bits that were already tested. Note that
* we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
+ * tested.
+ */
uniquemask = 0;
FOR_EACH_CHANNEL( chan_index ) {
@@ -1715,22 +1705,26 @@ emit_cmp(
/**
- * Check if inst src/dest regs use indirect addressing into temporary
- * register file.
+ * Check if inst src/dest regs use indirect addressing into temporary,
+ * input or output register files.
*/
static boolean
-indirect_temp_reference(const struct tgsi_full_instruction *inst)
+indirect_reg_reference(const struct tgsi_full_instruction *inst)
{
uint i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *reg = &inst->Src[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ if ((reg->Register.File == TGSI_FILE_TEMPORARY ||
+ reg->Register.File == TGSI_FILE_INPUT ||
+ reg->Register.File == TGSI_FILE_OUTPUT) &&
reg->Register.Indirect)
return TRUE;
}
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *reg = &inst->Dst[i];
- if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ if ((reg->Register.File == TGSI_FILE_TEMPORARY ||
+ reg->Register.File == TGSI_FILE_INPUT ||
+ reg->Register.File == TGSI_FILE_OUTPUT) &&
reg->Register.Indirect)
return TRUE;
}
@@ -1746,7 +1740,7 @@ emit_instruction(
unsigned chan_index;
/* we can't handle indirect addressing into temp register file yet */
- if (indirect_temp_reference(inst))
+ if (indirect_reg_reference(inst))
return FALSE;
switch (inst->Instruction.Opcode) {
@@ -2931,7 +2925,6 @@ tgsi_emit_sse2(
x86_make_disp( get_machine_base(),
Offset( struct tgsi_exec_machine, Samplers ) ) );
-
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
tgsi_parse_token( &parse );
@@ -3015,4 +3008,3 @@ tgsi_emit_sse2(
}
#endif /* PIPE_ARCH_X86 */
-
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 0a7e4105a8..3ec5496416 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -163,3 +163,149 @@ tgsi_util_set_full_src_register_sign_mode(
assert( 0 );
}
}
+
+/**
+ * Determine which channels of the specificed src register are effectively
+ * used by this instruction.
+ */
+unsigned
+tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
+ unsigned src_idx)
+{
+ const struct tgsi_full_src_register *src = &inst->Src[src_idx];
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ unsigned read_mask;
+ unsigned usage_mask;
+ unsigned chan;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_ARR:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_DIV:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_MIN:
+ case TGSI_OPCODE_MAX:
+ case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_MAD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_LRP:
+ case TGSI_OPCODE_CND:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_CEIL:
+ case TGSI_OPCODE_CLAMP:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_ROUND:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_DDX:
+ case TGSI_OPCODE_DDY:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SSG:
+ case TGSI_OPCODE_CMP:
+ case TGSI_OPCODE_TRUNC:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_AND:
+ case TGSI_OPCODE_OR:
+ case TGSI_OPCODE_XOR:
+ case TGSI_OPCODE_SAD:
+ /* Channel-wise operations */
+ read_mask = write_mask;
+ break;
+
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_RCC:
+ read_mask = TGSI_WRITEMASK_X;
+ break;
+
+ case TGSI_OPCODE_SCS:
+ read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
+ break;
+
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
+ read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
+ break;
+
+ case TGSI_OPCODE_DP2A:
+ read_mask = src_idx == 2 ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_OPCODE_DP3:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
+
+ case TGSI_OPCODE_DP4:
+ read_mask = TGSI_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_DPH:
+ read_mask = src_idx == 0 ? TGSI_WRITEMASK_XYZ : TGSI_WRITEMASK_XYZW;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXD:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXL:
+ case TGSI_OPCODE_TXP:
+ if (src_idx == 0) {
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ read_mask = TGSI_WRITEMASK_X;
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ read_mask = TGSI_WRITEMASK_XY;
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ read_mask = TGSI_WRITEMASK_XYZ;
+ break;
+
+ default:
+ assert(0);
+ read_mask = 0;
+ }
+
+ if (inst->Instruction.Opcode != TGSI_OPCODE_TEX) {
+ read_mask |= TGSI_WRITEMASK_W;
+ }
+ } else {
+ /* A safe approximation */
+ read_mask = TGSI_WRITEMASK_XYZW;
+ }
+ break;
+
+ default:
+ /* Assume all channels are read */
+ read_mask = TGSI_WRITEMASK_XYZW;
+ break;
+ }
+
+ usage_mask = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ if (read_mask & (1 << chan)) {
+ usage_mask |= 1 << tgsi_util_get_full_src_register_swizzle(src, chan);
+ }
+ }
+
+ return usage_mask;
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 19ee2e7cf2..04702ba982 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -34,6 +34,7 @@ extern "C" {
struct tgsi_src_register;
struct tgsi_full_src_register;
+struct tgsi_full_instruction;
void *
tgsi_align_128bit(
@@ -71,6 +72,10 @@ tgsi_util_set_full_src_register_sign_mode(
struct tgsi_full_src_register *reg,
unsigned sign_mode );
+unsigned
+tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
+ unsigned src_idx);
+
#if defined __cplusplus
}
#endif