summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r600/r600_compiler.c
blob: f1be2bbdf4f38a8bef1119e3157d57fd63aff884 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/*
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <errno.h>
#include "r600_compiler.h"

struct c_vector *c_vector_new(void)
{
	struct c_vector *v = calloc(1, sizeof(struct c_vector));

	if (v == NULL) {
		return NULL;
	}
	c_list_init(v);
	return v;
}

static unsigned c_opcode_is_alu(unsigned opcode)
{
	switch (opcode) {
	case C_OPCODE_MOV:
	case C_OPCODE_MUL:
	case C_OPCODE_MAD:
	case C_OPCODE_ARL:
	case C_OPCODE_LIT:
	case C_OPCODE_RCP:
	case C_OPCODE_RSQ:
	case C_OPCODE_EXP:
	case C_OPCODE_LOG:
	case C_OPCODE_ADD:
	case C_OPCODE_DP3:
	case C_OPCODE_DP4:
	case C_OPCODE_DST:
	case C_OPCODE_MIN:
	case C_OPCODE_MAX:
	case C_OPCODE_SLT:
	case C_OPCODE_SGE:
	case C_OPCODE_SUB:
	case C_OPCODE_LRP:
	case C_OPCODE_CND:
	case C_OPCODE_DP2A:
	case C_OPCODE_FRC:
	case C_OPCODE_CLAMP:
	case C_OPCODE_FLR:
	case C_OPCODE_ROUND:
	case C_OPCODE_EX2:
	case C_OPCODE_LG2:
	case C_OPCODE_POW:
	case C_OPCODE_XPD:
	case C_OPCODE_ABS:
	case C_OPCODE_RCC:
	case C_OPCODE_DPH:
	case C_OPCODE_COS:
	case C_OPCODE_DDX:
	case C_OPCODE_DDY:
	case C_OPCODE_PK2H:
	case C_OPCODE_PK2US:
	case C_OPCODE_PK4B:
	case C_OPCODE_PK4UB:
	case C_OPCODE_RFL:
	case C_OPCODE_SEQ:
	case C_OPCODE_SFL:
	case C_OPCODE_SGT:
	case C_OPCODE_SIN:
	case C_OPCODE_SLE:
	case C_OPCODE_SNE:
	case C_OPCODE_STR:
	case C_OPCODE_UP2H:
	case C_OPCODE_UP2US:
	case C_OPCODE_UP4B:
	case C_OPCODE_UP4UB:
	case C_OPCODE_X2D:
	case C_OPCODE_ARA:
	case C_OPCODE_ARR:
	case C_OPCODE_BRA:
	case C_OPCODE_SSG:
	case C_OPCODE_CMP:
	case C_OPCODE_SCS:
	case C_OPCODE_NRM:
	case C_OPCODE_DIV:
	case C_OPCODE_DP2:
	case C_OPCODE_CEIL:
	case C_OPCODE_I2F:
	case C_OPCODE_NOT:
	case C_OPCODE_TRUNC:
	case C_OPCODE_SHL:
	case C_OPCODE_AND:
	case C_OPCODE_OR:
	case C_OPCODE_MOD:
	case C_OPCODE_XOR:
	case C_OPCODE_SAD:
	case C_OPCODE_NRM4:
	case C_OPCODE_F2I:
	case C_OPCODE_IDIV:
	case C_OPCODE_IMAX:
	case C_OPCODE_IMIN:
	case C_OPCODE_INEG:
	case C_OPCODE_ISGE:
	case C_OPCODE_ISHR:
	case C_OPCODE_ISLT:
	case C_OPCODE_F2U:
	case C_OPCODE_U2F:
	case C_OPCODE_UADD:
	case C_OPCODE_UDIV:
	case C_OPCODE_UMAD:
	case C_OPCODE_UMAX:
	case C_OPCODE_UMIN:
	case C_OPCODE_UMOD:
	case C_OPCODE_UMUL:
	case C_OPCODE_USEQ:
	case C_OPCODE_USGE:
	case C_OPCODE_USHR:
	case C_OPCODE_USLT:
	case C_OPCODE_USNE:
		return 1;
	case C_OPCODE_END:
	case C_OPCODE_VFETCH:
	case C_OPCODE_KILP:
	case C_OPCODE_CAL:
	case C_OPCODE_RET:
	case C_OPCODE_TXB:
	case C_OPCODE_TXL:
	case C_OPCODE_BRK:
	case C_OPCODE_IF:
	case C_OPCODE_BGNFOR:
	case C_OPCODE_REP:
	case C_OPCODE_ELSE:
	case C_OPCODE_ENDIF:
	case C_OPCODE_ENDFOR:
	case C_OPCODE_ENDREP:
	case C_OPCODE_PUSHA:
	case C_OPCODE_POPA:
	case C_OPCODE_TXF:
	case C_OPCODE_TXQ:
	case C_OPCODE_CONT:
	case C_OPCODE_EMIT:
	case C_OPCODE_ENDPRIM:
	case C_OPCODE_BGNLOOP:
	case C_OPCODE_BGNSUB:
	case C_OPCODE_ENDLOOP:
	case C_OPCODE_ENDSUB:
	case C_OPCODE_NOP:
	case C_OPCODE_CALLNZ:
	case C_OPCODE_IFC:
	case C_OPCODE_BREAKC:
	case C_OPCODE_KIL:
	case C_OPCODE_TEX:
	case C_OPCODE_TXD:
	case C_OPCODE_TXP:
	case C_OPCODE_SWITCH:
	case C_OPCODE_CASE:
	case C_OPCODE_DEFAULT:
	case C_OPCODE_ENDSWITCH:
	default:
		return 0;
	}
}


/* NEW */
void c_node_init(struct c_node *node)
{
	memset(node, 0, sizeof(struct c_node));
	c_list_init(&node->predecessors);
	c_list_init(&node->successors);
	c_list_init(&node->childs);
	c_list_init(&node->insts);
	node->parent = NULL;
}

static struct c_node_link *c_node_link_new(struct c_node *node)
{
	struct c_node_link *link;

	link = calloc(1, sizeof(struct c_node_link));
	if (link == NULL)
		return NULL;
	c_list_init(link);
	link->node = node;
	return link;
}

int c_node_cfg_link(struct c_node *predecessor, struct c_node *successor)
{
	struct c_node_link *pedge, *sedge;

	pedge = c_node_link_new(successor);
	sedge = c_node_link_new(predecessor);
	if (sedge == NULL || pedge == NULL) {
		free(sedge);
		free(pedge);
		return -ENOMEM;
	}
	c_list_add_tail(pedge, &predecessor->successors);
	c_list_add_tail(sedge, &successor->predecessors);
	return 0;
}

int c_node_add_new_instruction_head(struct c_node *node, struct c_instruction *instruction)
{
	struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));

	if (inst == NULL)
		return -ENOMEM;
	memcpy(inst, instruction, sizeof(struct c_instruction));
	c_list_add(inst, &node->insts);
	return 0;
}

int c_node_add_new_instruction(struct c_node *node, struct c_instruction *instruction)
{
	struct c_instruction *inst = calloc(1, sizeof(struct c_instruction));

	if (inst == NULL)
		return -ENOMEM;
	memcpy(inst, instruction, sizeof(struct c_instruction));
	c_list_add_tail(inst, &node->insts);
	return 0;
}

struct c_node *c_shader_cfg_new_node_after(struct c_shader *shader, struct c_node *predecessor)
{
	struct c_node *node = calloc(1, sizeof(struct c_node));

	if (node == NULL)
		return NULL;
	c_node_init(node);
	if (c_node_cfg_link(predecessor, node)) {
		free(node);
		return NULL;
	}
	c_list_add_tail(node, &shader->nodes);
	return node;
}

int c_shader_init(struct c_shader *shader, unsigned type)
{
	unsigned i;
	int r;

	shader->type = type;
	for (i = 0; i < C_FILE_COUNT; i++) {
		shader->files[i].nvectors = 0;
		c_list_init(&shader->files[i].vectors);
	}
	c_list_init(&shader->nodes);
	c_node_init(&shader->entry);
	c_node_init(&shader->end);
	shader->entry.opcode = C_OPCODE_ENTRY;
	shader->end.opcode = C_OPCODE_END;
	r = c_node_cfg_link(&shader->entry, &shader->end);
	if (r)
		return r;
	return 0;
}

struct c_vector *c_shader_vector_new(struct c_shader *shader, unsigned file, unsigned name, int sid)
{
	struct c_vector *v = calloc(1, sizeof(struct c_vector));
	int i;

	if (v == NULL) {
		return NULL;
	}
	for (i = 0; i < 4; i++) {
		v->channel[i] = calloc(1, sizeof(struct c_channel));
		if (v->channel[i] == NULL)
			goto out_err;
		v->channel[i]->vindex = i;
		v->channel[i]->vector = v;
	}
	v->file = file;
	v->name = name;
	v->sid = sid;
	shader->files[v->file].nvectors++;
	v->id = shader->nvectors++;
	c_list_add_tail(v, &shader->files[v->file].vectors);
	return v;
out_err:
	for (i = 0; i < 4; i++) {
		free(v->channel[i]);
	}
	free(v);
	return NULL;
}

static void c_node_remove_link(struct c_node_link *head, struct c_node *node)
{
	struct c_node_link *link, *tmp;

	c_list_for_each_safe(link, tmp, head) {
		if (link->node == node) {
			c_list_del(link);
			free(link);
		}
	}
}

static void c_node_destroy(struct c_node *node)
{
	struct c_instruction *i, *ni;
	struct c_node_link *link, *tmp;

	c_list_for_each_safe(i, ni, &node->insts) {
		c_list_del(i);
		free(i);
	}
	if (node->parent)
		c_node_remove_link(&node->parent->childs, node);
	node->parent = NULL;
	c_list_for_each_safe(link, tmp, &node->predecessors) {
		c_node_remove_link(&link->node->successors, node);
		c_list_del(link);
		free(link);
	}
	c_list_for_each_safe(link, tmp, &node->successors) {
		c_node_remove_link(&link->node->predecessors, node);
		c_list_del(link);
		free(link);
	}
	c_list_for_each_safe(link, tmp, &node->childs) {
		link->node->parent = NULL;
		c_list_del(link);
		free(link);
	}
}

void c_shader_destroy(struct c_shader *shader)
{
	struct c_node *n, *nn;
	struct c_vector *v, *nv;
	unsigned i;

	for (i = 0; i < C_FILE_COUNT; i++) {
		shader->files[i].nvectors = 0;
		c_list_for_each_safe(v, nv, &shader->files[i].vectors) {
			c_list_del(v);
			free(v->channel[0]);
			free(v->channel[1]);
			free(v->channel[2]);
			free(v->channel[3]);
			free(v);
		}
	}
	c_list_for_each_safe(n, nn, &shader->nodes) {
		c_list_del(n);
		c_node_destroy(n);
	}
	memset(shader, 0, sizeof(struct c_shader));
}

static void c_shader_dfs_without_rec(struct c_node *entry, struct c_node *node)
{
	struct c_node_link *link;

	if (entry == node || entry->visited)
		return;
	entry->visited = 1;
	c_list_for_each(link, &entry->successors) {
		c_shader_dfs_without_rec(link->node, node);
	}
}

static void c_shader_dfs_without(struct c_shader *shader, struct c_node *node)
{
	struct c_node *n;

	shader->entry.visited = 0;
	shader->end.visited = 0;
	c_list_for_each(n, &shader->nodes) {
		n->visited = 0;
	}
	c_shader_dfs_without_rec(&shader->entry, node);
}

static int c_shader_build_dominator_tree_rec(struct c_shader *shader, struct c_node *node)
{
	struct c_node_link *link, *nlink;
	unsigned found = 0;
	int r;

	if (node->done)
		return 0;
	node->done = 1;
	c_list_for_each(link, &node->predecessors) {
		/* if we remove this predecessor can we reach the current node ? */
		c_shader_dfs_without(shader, link->node);
		if (node->visited == 0) {
			/* we were unable to visit current node thus current
			 * predecessor  is the immediate dominator of node, as
			 * their can be only one immediate dominator we break
			 */
			node->parent = link->node;
			nlink = c_node_link_new(node);
			if (nlink == NULL)
				return -ENOMEM;
			c_list_add_tail(nlink, &link->node->childs);
			found = 1;
			break;
		}
	}
	/* this shouldn't happen there should at least be 1 denominator for each node */
	if (!found && node->opcode != C_OPCODE_ENTRY) {
		fprintf(stderr, "invalid flow control graph node %p (%d) has no immediate dominator\n",
			node, node->opcode);
		return -EINVAL;
	}
	c_list_for_each(link, &node->predecessors) {
		r = c_shader_build_dominator_tree_rec(shader, link->node);
		if (r)
			return r;
	}
	return 0;
}

int c_shader_build_dominator_tree(struct c_shader *shader)
{
	struct c_node *node;
	c_list_for_each(node, &shader->nodes) {
		node->done = 0;
	}
	return c_shader_build_dominator_tree_rec(shader, &shader->end);
}