1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
|
/*
* Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Authors:
* Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
*/
#ifndef _R700_ASSEMBLER_H_
#define _R700_ASSEMBLER_H_
#include "main/mtypes.h"
#include "shader/prog_instruction.h"
#include "r700_chip.h"
#include "r700_shaderinst.h"
#include "r700_shader.h"
typedef enum SHADER_PIPE_TYPE
{
SPT_VP = 0,
SPT_FP = 1
} SHADER_PIPE_TYPE;
typedef enum ConstantCycles
{
NUMBER_OF_CYCLES = 3,
NUMBER_OF_COMPONENTS = 4
} ConstantCycles;
typedef enum HARDWARE_LIMIT_VALUES
{
TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE,
MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE,
CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE,
NUMBER_OF_INPUT_COLORS = 2,
NUMBER_OF_OUTPUT_COLORS = 8,
NUMBER_OF_TEXTURE_UNITS = 16,
MEGA_FETCH_BYTES = 32
} HARDWARE_LIMIT_VALUES;
typedef enum AddressMode
{
ADDR_ABSOLUTE = 0,
ADDR_RELATIVE_A0 = 1,
ADDR_RELATIVE_FLI_0 = 2,
NUMBER_OF_ADDR_MOD = 3
} AddressMode;
typedef enum SrcRegisterType
{
SRC_REG_TEMPORARY = 0,
SRC_REG_INPUT = 1,
SRC_REG_CONSTANT = 2,
SRC_REG_ALT_TEMPORARY = 3,
NUMBER_OF_SRC_REG_TYPE = 4
} SrcRegisterType;
typedef enum DstRegisterType
{
DST_REG_TEMPORARY = 0,
DST_REG_A0 = 1,
DST_REG_OUT = 2,
DST_REG_OUT_X_REPL = 3,
DST_REG_ALT_TEMPORARY = 4,
DST_REG_INPUT = 5,
NUMBER_OF_DST_REG_TYPE = 6
} DstRegisterType;
typedef unsigned int BITS;
typedef struct PVSDSTtag
{
BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2
BITS math:1;
BITS predicated:1; //10 //8
BITS pred_inv :1; //11 //8
BITS rtype:3;
BITS reg:10; //24 //20
BITS writex:1;
BITS writey:1;
BITS writez:1;
BITS writew:1; //28
BITS op3:1; // 29 Represents *_OP3_* ALU opcode
BITS dualop:1; // 30 //26
BITS addrmode0:1; //31 //29
BITS addrmode1:1; //32
} PVSDST;
typedef struct PVSSRCtag
{
BITS rtype:4;
BITS addrmode0:1;
BITS reg:10; //15 (8)
BITS swizzlex:3;
BITS swizzley:3;
BITS swizzlez:3;
BITS swizzlew:3; //27
BITS negx:1;
BITS negy:1;
BITS negz:1;
BITS negw:1; //31
//BITS addrsel:2;
BITS addrmode1:1; //32
} PVSSRC;
typedef struct PVSMATHtag
{
BITS rtype:4;
BITS spare:1;
BITS reg:8;
BITS swizzlex:3;
BITS swizzley:3;
BITS dstoff:2; // 2 bits of dest offset into alt ram
BITS opcode:4;
BITS negx:1;
BITS negy:1;
BITS dstcomp:2; // select dest component
BITS spare2:3;
} PVSMATH;
typedef union PVSDWORDtag
{
BITS bits;
PVSDST dst;
PVSSRC src;
PVSMATH math;
float f;
} PVSDWORD;
typedef struct VAP_OUT_VTX_FMT_0tag
{
BITS pos:1; // 0
BITS misc:1;
BITS clip_dist0:1;
BITS clip_dist1:1;
BITS pos_param:1; // 4
BITS color0:1; // 5
BITS color1:1;
BITS color2:1;
BITS color3:1;
BITS color4:1;
BITS color5:1;
BITS color6:1;
BITS color7:1;
BITS normal:1;
BITS depth:1; // 14
BITS point_size:1; // 15
BITS edge_flag:1;
BITS rta_index:1; // shares same channel as kill_flag
BITS kill_flag:1;
BITS viewport_index:1; // 19
BITS resvd1:12; // 20
} VAP_OUT_VTX_FMT_0;
typedef struct VAP_OUT_VTX_FMT_1tag
{
BITS tex0comp:3;
BITS tex1comp:3;
BITS tex2comp:3;
BITS tex3comp:3;
BITS tex4comp:3;
BITS tex5comp:3;
BITS tex6comp:3;
BITS tex7comp:3;
BITS resvd:8;
} VAP_OUT_VTX_FMT_1;
typedef struct VAP_OUT_VTX_FMT_2tag
{
BITS tex8comp :3;
BITS tex9comp :3;
BITS tex10comp:3;
BITS tex11comp:3;
BITS tex12comp:3;
BITS tex13comp:3;
BITS tex14comp:3;
BITS tex15comp:3;
BITS resvd:8;
} VAP_OUT_VTX_FMT_2;
typedef struct OUT_FRAGMENT_FMT_0tag
{
BITS color0:1;
BITS color1:1;
BITS color2:1;
BITS color3:1;
BITS color4:1;
BITS color5:1;
BITS color6:1;
BITS color7:1;
BITS depth:1;
BITS stencil_ref:1;
BITS coverage_to_mask:1;
BITS mask:1;
BITS resvd1:20;
} OUT_FRAGMENT_FMT_0;
typedef enum CF_CLAUSE_TYPE
{
CF_EXPORT_CLAUSE,
CF_ALU_CLAUSE,
CF_TEX_CLAUSE,
CF_VTX_CLAUSE,
CF_OTHER_CLAUSE,
CF_EMPTY_CLAUSE,
NUMBER_CF_CLAUSE_TYPES
} CF_CLAUSE_TYPE;
enum
{
MAX_BOOL_CONSTANTS = 32,
MAX_INT_CONSTANTS = 32,
MAX_FLOAT_CONSTANTS = 256,
FC_NONE = 0,
FC_IF = 1,
FC_LOOP = 2,
FC_REP = 3,
COND_NONE = 0,
COND_BOOL = 1,
COND_PRED = 2,
COND_ALU = 3,
SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
SAFEDIST_ALU = 6 ///< the same for alu->fc
};
typedef struct FC_LEVEL
{
unsigned int first; ///< first fc instruction on level (if, rep, loop)
unsigned int* mid; ///< middle instructions - else or all breaks on this level
unsigned int midLen;
unsigned int type;
unsigned int cond;
unsigned int inv;
unsigned int bpush; ///< 1 if first instruction does branch stack push
int id; ///< id of bool or int variable
} FC_LEVEL;
typedef struct VTX_FETCH_METHOD
{
GLboolean bEnableMini;
GLuint mega_fetch_remainder;
} VTX_FETCH_METHOD;
typedef struct r700_AssemblerBase
{
R700ControlFlowSXClause* cf_last_export_ptr;
R700ControlFlowSXClause* cf_current_export_clause_ptr;
R700ControlFlowALUClause* cf_current_alu_clause_ptr;
R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
//Result shader
R700_Shader * pR700Shader;
// No clause has been created yet
CF_CLAUSE_TYPE cf_current_clause_type;
GLuint number_of_exports;
GLuint number_of_colorandz_exports;
GLuint number_of_export_opcodes;
PVSDWORD D;
PVSDWORD S[3];
unsigned int uLastPosUpdate;
OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
unsigned int uIIns;
unsigned int uOIns;
unsigned int number_used_registers;
unsigned int uUsedConsts;
// Fragment programs
unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
unsigned int uBoolConsts;
unsigned int uIntConsts;
unsigned int uInsts;
unsigned int uConsts;
// Vertex programs
unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
unsigned char * pucOutMask;
//-----------------------------------------------------------------------------------
// flow control members
//-----------------------------------------------------------------------------------
unsigned int FCSP;
FC_LEVEL fc_stack[32];
unsigned int branch_depth;
unsigned int max_branch_depth;
//-----------------------------------------------------------------------------------
// ArgSubst used in Assemble_Source() function
//-----------------------------------------------------------------------------------
int aArgSubst[4];
GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
GLuint uOutputs;
GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
GLint depth_export_register_number;
GLint stencil_export_register_number;
GLint coverage_to_mask_export_register_number;
GLint mask_export_register_number;
GLuint starting_export_register_number;
GLuint starting_vfetch_register_number;
GLuint starting_temp_register_number;
GLuint uHelpReg;
GLuint uFirstHelpReg;
GLboolean input_position_is_used;
GLboolean input_normal_is_used;
GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
GLuint number_of_inputs;
InstDeps *pInstDeps;
SHADER_PIPE_TYPE currentShaderType;
struct prog_instruction * pILInst;
GLuint uiCurInst;
GLboolean bR6xx;
/* helper to decide which type of instruction to assemble */
GLboolean is_tex;
/* we inserted helper intructions and need barrier on next TEX ins */
GLboolean need_tex_barrier;
} r700_AssemblerBase;
//Internal use
BITS addrmode_PVSDST(PVSDST * pPVSDST);
void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
void nomask_PVSDST(PVSDST * pPVSDST);
BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
void neg_PVSSRC(PVSSRC* pPVSSRC);
void noneg_PVSSRC(PVSSRC* pPVSSRC);
void flipneg_PVSSRC(PVSSRC* pPVSSRC);
void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
GLboolean is_reduction_opcode(PVSDWORD * dest);
GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
GLboolean IsTex(gl_inst_opcode Opcode);
GLboolean IsAlu(gl_inst_opcode Opcode);
int check_current_clause(r700_AssemblerBase* pAsm,
CF_CLAUSE_TYPE new_clause_type);
GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
R700VertexInstruction* vertex_instruction_ptr);
GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
R700TextureInstruction* tex_instruction_ptr);
GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
GLuint gl_client_id,
GLuint destination_register,
GLuint number_of_elements,
GLenum dataElementType,
VTX_FETCH_METHOD* pFetchMethod);
GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLuint destination_register,
GLenum type,
GLint size,
GLubyte element,
GLuint _signed,
GLboolean normalize,
GLenum format,
VTX_FETCH_METHOD * pFetchMethod);
GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
void resethelpr(r700_AssemblerBase* pAsm);
void checkop_init(r700_AssemblerBase* pAsm);
GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
GLboolean checkop1(r700_AssemblerBase* pAsm);
GLboolean checkop2(r700_AssemblerBase* pAsm);
GLboolean checkop3(r700_AssemblerBase* pAsm);
GLboolean assemble_src(r700_AssemblerBase *pAsm,
int src,
int fld);
GLboolean assemble_dst(r700_AssemblerBase *pAsm);
GLboolean tex_dst(r700_AssemblerBase *pAsm);
GLboolean tex_src(r700_AssemblerBase *pAsm);
GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
void initialize(r700_AssemblerBase *pAsm);
GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
int source_index,
PVSSRC* pSource,
BITS scalar_channel_index);
GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr,
GLuint contiguous_slots_needed);
void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
int source_index,
BITS* psrc_sel,
BITS* psrc_rel,
BITS* psrc_chan,
BITS* psrc_neg);
int is_cfile(BITS sel);
int is_const(BITS sel);
int is_gpr(BITS sel);
GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
GLuint sel,
GLuint chan);
GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
GLboolean check_scalar(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean check_vector(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
GLboolean next_ins(r700_AssemblerBase *pAsm);
GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
GLboolean assemble_BAD(char *opcode_str);
GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
GLboolean assemble_COS(r700_AssemblerBase *pAsm);
GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
GLboolean assemble_DST(r700_AssemblerBase *pAsm);
GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
GLboolean assemble_POW(r700_AssemblerBase *pAsm);
GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
GLboolean assemble_IF(r700_AssemblerBase *pAsm);
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
GLuint export_count,
GLuint starting_register_number,
GLboolean is_depth_export);
GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
BITS depth_channel_select);
//Interface
GLboolean AssembleInstr(GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode);
GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
#endif //_R700_ASSEMBLER_H_
|