summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r--src/mesa/drivers/dri/r600/Makefile2
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c216
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h16
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c18
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c13
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c1790
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h90
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c42
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c4
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c183
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_shaderinst.h7
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c13
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c29
l---------src/mesa/drivers/dri/r600/radeon_bo.c1
l---------src/mesa/drivers/dri/r600/radeon_bo_int_drm.h1
l---------src/mesa/drivers/dri/r600/radeon_cs.c1
l---------src/mesa/drivers/dri/r600/radeon_cs_int_drm.h1
18 files changed, 1185 insertions, 1244 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 9b7c42042e..26f47b7268 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -14,7 +14,7 @@ EGL_SOURCES = server/radeon_egl.c
endif
ifeq ($(RADEON_LDFLAGS),)
-CS_SOURCES = radeon_cs_space_drm.c
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
COMMON_SOURCES = \
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index d27a3245a3..370bb04f93 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -52,29 +52,49 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_mipmap_tree.h"
#include "radeon_reg.h"
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
+struct r600_cs_manager_legacy
+{
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+};
-static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
- uint32_t ndw)
+static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
{
- struct radeon_cs *cs;
+ struct radeon_cs_int *csi;
- cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
- if (cs == NULL) {
+ csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+ if (csi == NULL) {
return NULL;
}
- cs->csm = csm;
- cs->ndw = (ndw + 0x3FF) & (~0x3FF);
- cs->packets = (uint32_t*)malloc(4*cs->ndw);
- if (cs->packets == NULL) {
- free(cs);
+ csi->csm = csm;
+ csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+ csi->packets = (uint32_t*)malloc(4*csi->ndw);
+ if (csi->packets == NULL) {
+ free(csi);
return NULL;
}
- cs->relocs_total_size = 0;
- return cs;
+ csi->relocs_total_size = 0;
+ return csi;
}
-static int r600_cs_write_reloc(struct radeon_cs *cs,
+static int r600_cs_write_reloc(struct radeon_cs_int *csi,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
@@ -83,7 +103,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
/* check domains */
if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
/* in one CS a bo can only be in read or write domain but not
@@ -98,7 +118,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
return -EINVAL;
}
/* check if bo is already referenced */
- for(i = 0; i < cs->crelocs; i++) {
+ for(i = 0; i < csi->crelocs; i++) {
uint32_t *indices;
uint32_t *reloc_indices;
@@ -129,109 +149,108 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
}
relocs[i].indices = indices;
relocs[i].reloc_indices = reloc_indices;
- relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
- relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
+ relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
+ relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
return 0;
}
}
/* add bo to reloc */
relocs = (struct r600_cs_reloc_legacy*)
- realloc(cs->relocs,
- sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
+ realloc(csi->relocs,
+ sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
if (relocs == NULL) {
return -ENOMEM;
}
- cs->relocs = relocs;
- relocs[cs->crelocs].base.bo = bo;
- relocs[cs->crelocs].base.read_domain = read_domain;
- relocs[cs->crelocs].base.write_domain = write_domain;
- relocs[cs->crelocs].base.flags = flags;
- relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
- relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
- if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
+ csi->relocs = relocs;
+ relocs[csi->crelocs].base.bo = bo;
+ relocs[csi->crelocs].base.read_domain = read_domain;
+ relocs[csi->crelocs].base.write_domain = write_domain;
+ relocs[csi->crelocs].base.flags = flags;
+ relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
+ relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
+ if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
{
return -ENOMEM;
}
- relocs[cs->crelocs].indices[0] = cs->cdw;
- relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
- relocs[cs->crelocs].cindices = 1;
- cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
- cs->crelocs++;
+ relocs[csi->crelocs].indices[0] = csi->cdw;
+ relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
+ relocs[csi->crelocs].cindices = 1;
+ csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+ csi->crelocs++;
radeon_bo_ref(bo);
return 0;
}
-static int r600_cs_begin(struct radeon_cs *cs,
+static int r600_cs_begin(struct radeon_cs_int *csi,
uint32_t ndw,
const char *file,
const char *func,
int line)
{
- if (cs->section) {
+ if (csi->section_ndw) {
fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
- cs->section_file, cs->section_func, cs->section_line);
+ csi->section_file, csi->section_func, csi->section_line);
fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 1;
- cs->section_ndw = ndw;
- cs->section_cdw = 0;
- cs->section_file = file;
- cs->section_func = func;
- cs->section_line = line;
+ csi->section_ndw = ndw;
+ csi->section_cdw = 0;
+ csi->section_file = file;
+ csi->section_func = func;
+ csi->section_line = line;
- if (cs->cdw + ndw > cs->ndw) {
+ if (csi->cdw + ndw > csi->ndw) {
uint32_t tmp, *ptr;
int num = (ndw > 0x400) ? ndw : 0x400;
- tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
- ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+ tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
+ ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
if (ptr == NULL) {
return -ENOMEM;
}
- cs->packets = ptr;
- cs->ndw = tmp;
+ csi->packets = ptr;
+ csi->ndw = tmp;
}
return 0;
}
-static int r600_cs_end(struct radeon_cs *cs,
+static int r600_cs_end(struct radeon_cs_int *csi,
const char *file,
const char *func,
int line)
{
- if (!cs->section) {
+ if (!csi->section_ndw) {
fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 0;
- if ( cs->section_ndw != cs->section_cdw ) {
+ if ( csi->section_ndw != csi->section_cdw ) {
fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
- cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
- fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
- cs->section_ndw, cs->cdw, cs->section_cdw);
+ csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
+ fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
+ csi->section_ndw, csi->cdw, csi->section_cdw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
+ csi->section_ndw = 0;
- if (cs->cdw > cs->ndw) {
+ if (csi->cdw > csi->ndw) {
fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
- cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
+ csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
assert(0);
@@ -240,20 +259,20 @@ static int r600_cs_end(struct radeon_cs *cs,
return 0;
}
-static int r600_cs_process_relocs(struct radeon_cs *cs,
+static int r600_cs_process_relocs(struct radeon_cs_int *csi,
uint32_t * reloc_chunk,
uint32_t * length_dw_reloc_chunk)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i, j, r;
uint32_t offset_dw = 0;
- csm = (struct r600_cs_manager_legacy*)cs->csm;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ csm = (struct r600_cs_manager_legacy*)csi->csm;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
restart:
- for (i = 0; i < cs->crelocs; i++) {
+ for (i = 0; i < csi->crelocs; i++) {
uint32_t soffset, eoffset;
r = radeon_bo_legacy_validate(relocs[i].base.bo,
@@ -269,9 +288,9 @@ restart:
for (j = 0; j < relocs[i].cindices; j++) {
/* pkt3 nop header in ib chunk */
- cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+ csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
/* reloc index in ib chunk */
- cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+ csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
}
/* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
@@ -286,14 +305,14 @@ restart:
return 0;
}
-static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
+static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
- for (i = 0; i < cs->crelocs; i++) {
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+ for (i = 0; i < csi->crelocs; i++) {
radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
radeon_bo_unref(relocs[i].base.bo);
}
@@ -301,21 +320,21 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
}
#if 0
-static void dump_cmdbuf(struct radeon_cs *cs)
+static void dump_cmdbuf(struct radeon_cs_int *csi)
{
int i;
fprintf(stderr,"--start--\n");
- for (i = 0; i < cs->cdw; i++){
- fprintf(stderr,"0x%08x\n", cs->packets[i]);
+ for (i = 0; i < csi->cdw; i++){
+ fprintf(stderr,"0x%08x\n", csi->packets[i]);
}
fprintf(stderr,"--end--\n");
}
#endif
-static int r600_cs_emit(struct radeon_cs *cs)
+static int r600_cs_emit(struct radeon_cs_int *csi)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct drm_radeon_cs cs_cmd;
struct drm_radeon_cs_chunk cs_chunk[2];
uint32_t length_dw_reloc_chunk;
@@ -329,9 +348,9 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_count = 1;
- reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
+ reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
- r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
+ r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
if (r) {
free(reloc_chunk);
return 0;
@@ -339,8 +358,8 @@ static int r600_cs_emit(struct radeon_cs *cs)
/* raw ib chunk */
cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
- cs_chunk[0].length_dw = cs->cdw;
- cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
+ cs_chunk[0].length_dw = csi->cdw;
+ cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
/* reloc chaunk */
cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
@@ -358,7 +377,7 @@ static int r600_cs_emit(struct radeon_cs *cs)
do
{
- r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+ r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
retry++;
} while (r == -EAGAIN && retry < 1000);
@@ -369,11 +388,11 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_age = cs_cmd.cs_id;
- r600_cs_set_age(cs);
+ r600_cs_set_age(csi);
- cs->csm->read_used = 0;
- cs->csm->vram_write_used = 0;
- cs->csm->gart_write_used = 0;
+ csi->csm->read_used = 0;
+ csi->csm->vram_write_used = 0;
+ csi->csm->gart_write_used = 0;
free(reloc_chunk);
@@ -393,35 +412,34 @@ static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
}
}
-static int r600_cs_destroy(struct radeon_cs *cs)
+static int r600_cs_destroy(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- free(cs->packets);
- free(cs);
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ free(csi->packets);
+ free(csi);
return 0;
}
-static int r600_cs_erase(struct radeon_cs *cs)
+static int r600_cs_erase(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- cs->relocs_total_size = 0;
- cs->relocs = NULL;
- cs->crelocs = 0;
- cs->cdw = 0;
- cs->section = 0;
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ csi->relocs_total_size = 0;
+ csi->relocs = NULL;
+ csi->crelocs = 0;
+ csi->cdw = 0;
return 0;
}
-static int r600_cs_need_flush(struct radeon_cs *cs)
+static int r600_cs_need_flush(struct radeon_cs_int *csi)
{
/* this function used to flush when the BO usage got to
* a certain size, now the higher levels handle this better */
return 0;
}
-static void r600_cs_print(struct radeon_cs *cs, FILE *file)
+static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
{
}
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index eba43d37b6..dff0009699 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_IT_SET_CTL_CONST 0x00006F00
#define R600_IT_SURFACE_BASE_UPDATE 0x00007300
-struct r600_cs_manager_legacy
-{
- struct radeon_cs_manager base;
- struct radeon_context *ctx;
- /* hack for scratch stuff */
- uint32_t pending_age;
- uint32_t pending_count;
-};
-
-struct r600_cs_reloc_legacy {
- struct radeon_cs_reloc base;
- uint32_t cindices;
- uint32_t *indices;
- uint32_t *reloc_indices;
-};
-
struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
/**
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 25314eff56..45bbc3c071 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -74,7 +74,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-//#define R600_ENABLE_GLSL_TEST 1
+#define R600_ENABLE_GLSL_TEST 1
#define need_GL_VERSION_2_0
#define need_GL_ARB_occlusion_query
@@ -317,20 +317,8 @@ static void r600InitGLExtensions(GLcontext *ctx)
#ifdef R600_ENABLE_GLSL_TEST
driInitExtensions(ctx, gl_20_extension, GL_TRUE);
- //_mesa_enable_2_0_extensions(ctx);
- //1.5
- ctx->Extensions.ARB_occlusion_query = GL_TRUE;
- ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE;
- ctx->Extensions.EXT_shadow_funcs = GL_TRUE;
- //2.0
- ctx->Extensions.ARB_draw_buffers = GL_TRUE;
- ctx->Extensions.ARB_point_sprite = GL_TRUE;
- ctx->Extensions.ARB_shader_objects = GL_TRUE;
- ctx->Extensions.ARB_vertex_shader = GL_TRUE;
- ctx->Extensions.ARB_fragment_shader = GL_TRUE;
- ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
- ctx->Extensions.ATI_separate_stencil = GL_TRUE;
-
+ _mesa_enable_2_0_extensions(ctx);
+
/* glsl compiler has problem if this is not GL_TRUE */
ctx->Shader.EmitCondCodes = GL_TRUE;
#endif /* R600_ENABLE_GLSL_TEST */
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 4ec315b78c..2a4a6e6ee1 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -917,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
return;
}
- radeon_update_renderbuffers(pDRICtx, dPriv);
- /* back & depth buffer are useless free them right away */
- rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
- rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
+ radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
rb = rfb->color_rb[0];
if (rb->bo == NULL) {
/* Failed to BO for the buffer */
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 309c90fdd0..51692a11ff 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -32,6 +32,7 @@
#include "main/mtypes.h"
#include "main/imports.h"
+#include "shader/prog_parameter.h"
#include "radeon_debug.h"
#include "r600_context.h"
@@ -41,6 +42,39 @@
#define USE_CF_FOR_CONTINUE_BREAK 1
#define USE_CF_FOR_POP_AFTER 1
+struct prog_instruction noise1_insts[12] = {
+ {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+ {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+ &(noise1_insts[0]),
+ 12,
+ 2,
+ 1,
+ 0,
+ &(noise1_const[0]),
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ {0,0,0},
+ 0
+};
+
BITS addrmode_PVSDST(PVSDST * pPVSDST)
{
return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
@@ -330,14 +364,14 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
return(format);
}
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
{
- if(pAsm->D.dst.op3)
+ if(nIsOp3 > 0)
{
return 3;
}
- switch (pAsm->D.dst.opcode)
+ switch (opcode)
{
case SQ_OP2_INST_ADD:
case SQ_OP2_INST_KILLE:
@@ -378,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 1;
default: radeon_error(
- "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+ "Need instruction operand number for %x.\n", opcode);
};
return 3;
@@ -500,12 +534,20 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
pAsm->unCFflags = 0;
+ pAsm->presubs = NULL;
+ pAsm->unPresubArraySize = 0;
+ pAsm->unNumPresub = 0;
+ pAsm->unCurNumILInsts = 0;
+
+ pAsm->unVetTexBits = 0;
+
return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
{
- if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+ (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
{
return GL_TRUE;
}
@@ -1220,7 +1262,15 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
- pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ if(pILInst->SrcReg[src].Index < 0)
+ {
+ WARN_ONCE("Negative register offsets not supported yet!\n");
+ pAsm->S[fld].src.reg = 0;
+ }
+ else
+ {
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ }
break;
case PROGRAM_INPUT:
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
@@ -1292,6 +1342,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm)
pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+ if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
+ {
+ pAsm->D2.dst2.SaturateMode = 1;
+ }
+ else
+ {
+ pAsm->D2.dst2.SaturateMode = 0;
+ }
+
return GL_TRUE;
}
@@ -1364,43 +1423,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
break;
case PROGRAM_INPUT:
- switch (pILInst->SrcReg[0].Index)
+ if(SPT_VP == pAsm->currentShaderType)
{
- case FRAG_ATTRIB_WPOS:
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- case FRAG_ATTRIB_FOGC:
- case FRAG_ATTRIB_TEX0:
- case FRAG_ATTRIB_TEX1:
- case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
- case FRAG_ATTRIB_TEX4:
- case FRAG_ATTRIB_TEX5:
- case FRAG_ATTRIB_TEX6:
- case FRAG_ATTRIB_TEX7:
- bValidTexCoord = GL_TRUE;
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case VERT_ATTRIB_TEX0:
+ case VERT_ATTRIB_TEX1:
+ case VERT_ATTRIB_TEX2:
+ case VERT_ATTRIB_TEX3:
+ case VERT_ATTRIB_TEX4:
+ case VERT_ATTRIB_TEX5:
+ case VERT_ATTRIB_TEX6:
+ case VERT_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ }
+ else
+ {
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_WPOS:
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ }
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
- case FRAG_ATTRIB_FACE:
- fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
- break;
- case FRAG_ATTRIB_PNTC:
- fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
- break;
- }
-
- if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
- (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
- {
- bValidTexCoord = GL_TRUE;
- pAsm->S[0].src.reg =
- pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ }
}
- break;
+ break;
}
}
@@ -1445,8 +1526,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
if (normalized) {
@@ -1465,7 +1555,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
-
tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
// dst
@@ -1724,7 +1813,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
}
else
{
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
}
// If this clause constains any instruction that is forward dependent on a TEX instruction,
@@ -2001,7 +2090,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2090,7 +2179,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2159,6 +2248,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
+ R700ALUInstruction * alu_instruction_ptr;
+ R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+ R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
GLuint number_of_scalar_operations;
GLboolean is_single_scalar_operation;
GLuint scalar_channel_index;
@@ -2167,7 +2260,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
int current_source_index;
GLuint contiguous_slots_needed;
- GLuint uNumSrc = r700GetNumOperands(pAsm);
+ GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
//GLuint channel_swizzle, j;
//GLuint chan_counter[4] = {0, 0, 0, 0};
//PVSSRC * pSource[3];
@@ -2224,262 +2317,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
contiguous_slots_needed = 0;
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ if(!is_single_scalar_operation)
{
contiguous_slots_needed = 4;
}
+ contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
+
initialize(pAsm);
for (scalar_channel_index=0;
scalar_channel_index < number_of_scalar_operations;
scalar_channel_index++)
{
- R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstruction(alu_instruction_ptr);
-
- //src 0
- current_source_index = 0;
- pcurrent_source = &(pAsm->S[0].src);
-
- if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- current_source_index,
- pcurrent_source,
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
-
- if (uNumSrc > 1)
- {
- // Process source 1
- current_source_index = 1;
- pcurrent_source = &(pAsm->S[current_source_index].src);
-
- if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- current_source_index,
- pcurrent_source,
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
- }
-
- //other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
-
- if( (is_single_scalar_operation == GL_TRUE)
- || (GL_TRUE == bSplitInst) )
- {
- alu_instruction_ptr->m_Word0.f.last = 1;
- }
- else
- {
- alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
- }
-
- alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
-
- // dst
- if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
- (pAsm->D.dst.rtype == DST_REG_OUT) )
- {
- alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
- }
- else
- {
- radeon_error("Only temp destination registers supported for ALU dest regs.\n");
- return GL_FALSE;
- }
-
- alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
-
- if ( is_single_scalar_operation == GL_TRUE )
- {
- // Override scalar_channel_index since only one scalar value will be written
- if(pAsm->D.dst.writex)
- {
- scalar_channel_index = 0;
- }
- else if(pAsm->D.dst.writey)
- {
- scalar_channel_index = 1;
- }
- else if(pAsm->D.dst.writez)
- {
- scalar_channel_index = 2;
- }
- else if(pAsm->D.dst.writew)
- {
- scalar_channel_index = 3;
- }
- }
-
- alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
-
- alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
-
- if (pAsm->D.dst.op3)
- {
- //op3
-
- alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
-
- //There's 3rd src for op3
- current_source_index = 2;
- pcurrent_source = &(pAsm->S[current_source_index].src);
-
- if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- current_source_index,
- pcurrent_source,
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
- }
- else
+ if(scalar_channel_index == (number_of_scalar_operations-1))
{
- //op2
- if (pAsm->bR6xx)
+ switch(pAsm->D2.dst2.literal_slots)
{
- alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
-
- alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
-
- //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
- //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
- switch (scalar_channel_index)
- {
- case 0:
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
- break;
- case 1:
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
- break;
- case 2:
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
- break;
- case 3:
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
- break;
- default:
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
- break;
- }
- alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
- }
- else
- {
- alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
-
- alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
-
- //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
- //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
- switch (scalar_channel_index)
- {
- case 0:
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
- break;
- case 1:
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
- break;
- case 2:
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
- break;
- case 3:
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
- break;
- default:
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
- break;
- }
- alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
- }
- }
-
- if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
- {
- return GL_FALSE;
- }
-
- /*
- * Judge the type of current instruction, is it vector or scalar
- * instruction.
- */
- if (is_single_scalar_operation)
- {
- if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
- {
- return GL_FALSE;
- }
+ case 0:
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ break;
+ case 1:
+ alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+ Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+ break;
+ case 2:
+ alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+ Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+ break;
+ };
}
-
- contiguous_slots_needed = 0;
- }
-
- return GL_TRUE;
-}
-
-GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
-{
- GLuint number_of_scalar_operations;
- GLboolean is_single_scalar_operation;
- GLuint scalar_channel_index;
-
- PVSSRC * pcurrent_source;
- int current_source_index;
- GLuint contiguous_slots_needed;
-
- GLuint uNumSrc = r700GetNumOperands(pAsm);
-
- GLboolean bSplitInst = GL_FALSE;
-
- if (1 == pAsm->D.dst.math)
- {
- is_single_scalar_operation = GL_TRUE;
- number_of_scalar_operations = 1;
- }
- else
- {
- is_single_scalar_operation = GL_FALSE;
- number_of_scalar_operations = 4;
- }
-
- contiguous_slots_needed = 0;
-
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
- {
- contiguous_slots_needed = 4;
- }
-
- initialize(pAsm);
-
- for (scalar_channel_index=0;
- scalar_channel_index < number_of_scalar_operations;
- scalar_channel_index++)
- {
- R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
+ else
{
- return GL_FALSE;
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
}
- Init_R700ALUInstruction(alu_instruction_ptr);
//src 0
current_source_index = 0;
@@ -2489,7 +2364,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
@@ -2503,13 +2378,13 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
}
//other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+ alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
if( (is_single_scalar_operation == GL_TRUE)
|| (GL_TRUE == bSplitInst) )
@@ -2524,15 +2399,15 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
if(1 == pAsm->D.dst.predicated)
{
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
}
else
{
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
}
-
+
// dst
if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
(pAsm->D.dst.rtype == DST_REG_OUT) )
@@ -2540,7 +2415,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
}
else
- {
+ {
radeon_error("Only temp destination registers supported for ALU dest regs.\n");
return GL_FALSE;
}
@@ -2597,8 +2472,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
@@ -2626,8 +2501,8 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
@@ -2654,7 +2529,7 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
}
if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
- {
+ {
return GL_FALSE;
}
@@ -2665,272 +2540,19 @@ GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
if (is_single_scalar_operation)
{
if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
- {
+ {
return GL_FALSE;
}
}
else
{
if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
- {
+ {
return GL_FALSE;
}
}
- contiguous_slots_needed = 0;
- }
-
- return GL_TRUE;
-}
-
-GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
-{
- R700ALUInstruction * alu_instruction_ptr;
- R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
- R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
-
- GLuint number_of_scalar_operations;
- GLboolean is_single_scalar_operation;
- GLuint scalar_channel_index;
-
- GLuint contiguous_slots_needed;
- GLuint lastInstruction;
- GLuint not_masked[4];
-
- GLuint uNumSrc = r700GetNumOperands(pAsm);
-
- GLboolean bSplitInst = GL_FALSE;
-
- number_of_scalar_operations = 0;
- contiguous_slots_needed = 0;
-
- if(1 == pAsm->D.dst.writew)
- {
- lastInstruction = 3;
- number_of_scalar_operations++;
- not_masked[3] = 1;
- }
- else
- {
- not_masked[3] = 0;
- }
- if(1 == pAsm->D.dst.writez)
- {
- lastInstruction = 2;
- number_of_scalar_operations++;
- not_masked[2] = 1;
- }
- else
- {
- not_masked[2] = 0;
- }
- if(1 == pAsm->D.dst.writey)
- {
- lastInstruction = 1;
- number_of_scalar_operations++;
- not_masked[1] = 1;
- }
- else
- {
- not_masked[1] = 0;
- }
- if(1 == pAsm->D.dst.writex)
- {
- lastInstruction = 0;
- number_of_scalar_operations++;
- not_masked[0] = 1;
- }
- else
- {
- not_masked[0] = 0;
- }
-
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
- {
- contiguous_slots_needed = 4;
- }
- else
- {
- contiguous_slots_needed = number_of_scalar_operations;
- }
-
- if(1 == pAsm->D2.dst2.literal)
- {
- contiguous_slots_needed += 1;
- }
- else if(2 == pAsm->D2.dst2.literal)
- {
- contiguous_slots_needed += 2;
- }
-
- initialize(pAsm);
-
- for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++)
- {
- if(0 == not_masked[scalar_channel_index])
- {
- continue;
- }
-
- if(scalar_channel_index == lastInstruction)
- {
- switch (pAsm->D2.dst2.literal)
- {
- case 0:
- alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstruction(alu_instruction_ptr);
- break;
- case 1:
- alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
- if (alu_instruction_ptr_hl == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]);
- alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
- break;
- case 2:
- alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
- if (alu_instruction_ptr_fl == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]);
- alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
- break;
- default:
- break;
- };
- }
- else
- {
- alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstruction(alu_instruction_ptr);
- }
-
- //src 0
- if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- 0,
- &(pAsm->S[0].src),
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
-
- if (uNumSrc > 1)
- {
- // Process source 1
- if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- 1,
- &(pAsm->S[1].src),
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
- }
-
- //other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
-
- if(scalar_channel_index == lastInstruction)
- {
- alu_instruction_ptr->m_Word0.f.last = 1;
- }
-
- alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
- if(1 == pAsm->D.dst.predicated)
- {
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
- }
- else
- {
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0;
- }
-
- // dst
- if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
- (pAsm->D.dst.rtype == DST_REG_OUT) )
- {
- alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
- }
- else
- {
- radeon_error("Only temp destination registers supported for ALU dest regs.\n");
- return GL_FALSE;
- }
-
- alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
-
- alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
-
- alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
-
- if (pAsm->D.dst.op3)
- {
- //op3
- alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
-
- //There's 3rd src for op3
- if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
- 2,
- &(pAsm->S[2].src),
- scalar_channel_index) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- //op2
- if (pAsm->bR6xx)
- {
- alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1;
- alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
- }
- else
- {
- alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1;
- alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
- }
- }
-
- if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
- {
- return GL_FALSE;
- }
-
- if (1 == number_of_scalar_operations)
- {
- if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
- {
- return GL_FALSE;
- }
- }
-
- contiguous_slots_needed -= 2;
+ contiguous_slots_needed -= 1;
}
return GL_TRUE;
@@ -2987,69 +2609,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
pAsm->S[2].bits = 0;
pAsm->is_tex = GL_FALSE;
pAsm->need_tex_barrier = GL_FALSE;
-
- return GL_TRUE;
-}
-
-GLboolean next_ins2(r700_AssemblerBase *pAsm)
-{
- struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
-
- //ALU
- if( GL_FALSE == assemble_alu_instruction2(pAsm) )
- {
- radeon_error("Error assembling ALU instruction\n");
- return GL_FALSE;
- }
-
- if(pAsm->D.dst.rtype == DST_REG_OUT)
- {
- if(pAsm->D.dst.op3)
- {
- // There is no mask for OP3 instructions, so all channels are written
- pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
- }
- else
- {
- pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
- |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
- }
- }
-
- //reset for next inst.
- pAsm->D.bits = 0;
- pAsm->D2.bits = 0;
- pAsm->S[0].bits = 0;
- pAsm->S[1].bits = 0;
- pAsm->S[2].bits = 0;
- pAsm->is_tex = GL_FALSE;
- pAsm->need_tex_barrier = GL_FALSE;
-
pAsm->D2.bits = 0;
-
- return GL_TRUE;
-}
-
-/* not work yet */
-GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
-{
- struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
-
- //ALU
- if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) )
- {
- radeon_error("Error assembling ALU instruction\n");
- return GL_FALSE;
- }
-
- //reset for next inst.
- pAsm->D.bits = 0;
- pAsm->D2.bits = 0;
- pAsm->S[0].bits = 0;
- pAsm->S[1].bits = 0;
- pAsm->S[2].bits = 0;
- pAsm->is_tex = GL_FALSE;
- pAsm->need_tex_barrier = GL_FALSE;
+ pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
return GL_TRUE;
}
@@ -3282,9 +2843,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_COS(r700_AssemblerBase *pAsm)
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
- return assemble_math_function(pAsm, SQ_OP2_INST_COS);
+ int tmp;
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ assemble_src(pAsm, 0, -1);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
+ next_ins(pAsm);
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+
+ assemble_dst(pAsm);
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ next_ins(pAsm);
+
+ //TODO - replicate if more channels set in WriteMask
+ return GL_TRUE;
+
}
GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
@@ -3554,10 +3150,13 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
{
- checkop2(pAsm);
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ if(pILInst->Opcode == OPCODE_KIL)
+ checkop1(pAsm);
pAsm->D.dst.opcode = opcode;
- pAsm->D.dst.math = 1;
+ //pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -3567,17 +3166,30 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
pAsm->D.dst.writez = 0;
pAsm->D.dst.writew = 0;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = 0;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if(pILInst->Opcode == OPCODE_KIL_NV)
{
- return GL_FALSE;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+ neg_PVSSRC(&(pAsm->S[1].src));
}
-
- if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ else
{
- return GL_FALSE;
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+
}
-
- if ( GL_FALSE == next_ins2(pAsm) )
+
+ if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
@@ -4384,77 +3996,67 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
}
-GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
-{
- return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
-}
-
GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
BITS tmp;
- checkop1(pAsm);
-
- tmp = gethelpr(pAsm);
-
- // COS tmp.x, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_COS;
- pAsm->D.dst.math = 1;
+ checkop1(pAsm);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
+ tmp = gethelpr(pAsm);
+ /* tmp.x = src /2*PI */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ assemble_src(pAsm, 0, -1);
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
- // SIN tmp.y, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
- pAsm->D.dst.math = 1;
+ next_ins(pAsm);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writey = 1;
+ // COS dst.x, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ pAsm->D.dst.math = 1;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ assemble_dst(pAsm);
+ /* mask y */
+ pAsm->D.dst.writey = 0;
- if( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- // MOV dst.mask, tmp
- pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- if( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ // SIN dst.y, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ pAsm->D.dst.math = 1;
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp;
+ assemble_dst(pAsm);
+ /* mask x */
+ pAsm->D.dst.writex = 0;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[0].src.swizzlez = SQ_SEL_0;
- pAsm->S[0].src.swizzlew = SQ_SEL_0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
return GL_TRUE;
}
@@ -4467,7 +4069,7 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
}
pAsm->D.dst.opcode = opcode;
- pAsm->D.dst.math = 1;
+ //pAsm->D.dst.math = 1;
if( GL_FALSE == assemble_dst(pAsm) )
{
@@ -4494,32 +4096,34 @@ GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
{
- if( GL_FALSE == checkop2(pAsm) )
- {
- return GL_FALSE;
- }
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
pAsm->D.dst.predicated = 1;
- pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
- if( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->uHelpReg;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
+ pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
+ noneg_PVSSRC(&(pAsm->S[0].src));
- if( GL_FALSE == assemble_src(pAsm, 1, -1) )
- {
- return GL_FALSE;
- }
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = pAsm->uHelpReg;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_0;
+ pAsm->S[1].src.swizzley = SQ_SEL_0;
+ pAsm->S[1].src.swizzlez = SQ_SEL_0;
+ pAsm->S[1].src.swizzlew = SQ_SEL_0;
- if( GL_FALSE == next_ins2(pAsm) )
+ if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
@@ -4626,22 +4230,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
need_barrier = GL_TRUE;
}
- switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
- {
- case OPCODE_TEX:
- break;
- case OPCODE_TXB:
- radeon_error("do not support TXB yet\n");
- return GL_FALSE;
- break;
- case OPCODE_TXP:
- break;
- default:
- radeon_error("Internal error: bad texture op (not TEX)\n");
- return GL_FALSE;
- break;
- }
-
if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
{
GLuint tmp = gethelpr(pAsm);
@@ -4719,24 +4307,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
- * have to do explicit instruction
- */
- pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp1;
- pAsm->D.dst.writez = 1;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp1;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].bits = pAsm->S[0].bits;
- flipneg_PVSSRC(&(pAsm->S[1].src));
-
- next_ins(pAsm);
-
/* tmp1.z = RCP_e(|tmp1.z|) */
pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
pAsm->D.dst.math = 1;
@@ -4749,13 +4319,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp1;
pAsm->S[0].src.swizzlex = SQ_SEL_Z;
+ pAsm->S[0].src.abs = 1;
next_ins(pAsm);
/* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
* MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
* muladd has no writemask, have to use another temp
- * also no support for imm constants, so add 1 here
*/
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
pAsm->D.dst.op3 = 1;
@@ -4772,30 +4342,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[1].src.reg = tmp1;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
- pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ /* immediate c 1.5 */
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1.5F;
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
pAsm->S[2].src.reg = tmp1;
- setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
-
- next_ins(pAsm);
-
- /* ADD the remaining .5 */
- pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp2;
- pAsm->D.dst.writex = 1;
- pAsm->D.dst.writey = 1;
- pAsm->D.dst.writez = 0;
- pAsm->D.dst.writew = 0;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp2;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
- noswizzle_PVSSRC(&(pAsm->S[1].src));
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
next_ins(pAsm);
@@ -4820,14 +4372,32 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ case OPCODE_DDX:
+ /* will these need WQM(1) on CF inst ? */
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+ break;
+ case OPCODE_DDY:
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+ break;
+ case OPCODE_TXB:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ break;
+ default:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ }
+
+ pAsm->is_tex = GL_TRUE;
+ if ( GL_TRUE == need_barrier )
+
pAsm->is_tex = GL_TRUE;
if ( GL_TRUE == need_barrier )
{
pAsm->need_tex_barrier = GL_TRUE;
}
// Set src1 to tex unit id
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
+ pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
//No sw info from mesa compiler, so hard code here.
@@ -5103,6 +4673,11 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
{
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+
if(GL_FALSE == add_cf_instruction(pAsm) )
{
return GL_FALSE;
@@ -5247,6 +4822,11 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
{
#ifdef USE_CF_FOR_CONTINUE_BREAK
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
unsigned int unFCSP;
for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
{
@@ -5313,6 +4893,10 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
{
#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
unsigned int unFCSP;
for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
{
@@ -5471,7 +5055,7 @@ void add_return_inst(r700_AssemblerBase *pAsm)
pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
}
-GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
{
/* Put in sub */
if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
@@ -5486,7 +5070,7 @@ GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
pAsm->unSubArraySize += 10;
}
- pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
+ pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
@@ -5577,9 +5161,13 @@ GLboolean assemble_RET(r700_AssemblerBase *pAsm)
GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
GLint nILindex,
+ GLuint uiIL_Shift,
GLuint uiNumberInsts,
- struct prog_instruction *pILInst)
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc)
{
+ GLint uiIL_Offset;
+
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
if(GL_FALSE == add_cf_instruction(pAsm) )
@@ -5612,8 +5200,12 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
pAsm->unCallerArraySize += 10;
}
- pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
- pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+ uiIL_Offset = nILindex + uiIL_Shift;
+ pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
+ pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+
+ pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
+ pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
pAsm->unCallerArrayPointer++;
@@ -5623,7 +5215,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
GLboolean bRet;
for(j=0; j<pAsm->unSubArrayPointer; j++)
{
- if(nILindex == pAsm->subs[j].subIL_Offset)
+ if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
{ /* compiled before */
max = pAsm->subs[j].unStackDepthMax
@@ -5641,7 +5233,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
unSubID = pAsm->unSubArrayPointer;
- bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
+ bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
if(GL_TRUE == bRet)
{
@@ -5651,6 +5243,8 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
{
pAsm->CALLSTACK[pAsm->CALLSP].max = max;
}
+
+ pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
}
return bRet;
@@ -5668,11 +5262,12 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
pAsm->D.dst.writey = 0;
pAsm->D.dst.writez = 0;
pAsm->D.dst.writew = 0;
- pAsm->D2.dst2.literal = 1;
+ pAsm->D2.dst2.literal_slots = 1;
pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
pAsm->D.dst.predicated = 0;
/* in reloc where dislink flag init inst, only one slot alu inst is handled. */
pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
#if 0
pAsm->S[0].src.rtype = SRC_REC_LITERAL;
//pAsm->S[0].src.reg = 0;
@@ -5697,7 +5292,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
pAsm->S[0].src.swizzlez = flagValue;
pAsm->S[0].src.swizzlew = flagValue;
- if( GL_FALSE == next_ins2(pAsm) )
+ if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
@@ -5722,9 +5317,10 @@ GLboolean testFlag(r700_AssemblerBase *pAsm)
pAsm->D.dst.writey = 0;
pAsm->D.dst.writez = 0;
pAsm->D.dst.writew = 0;
- pAsm->D2.dst2.literal = 1;
+ pAsm->D2.dst2.literal_slots = 1;
pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
pAsm->D.dst.predicated = 1;
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
pAsm->S[0].src.reg = pAsm->flag_reg_index;
@@ -5758,7 +5354,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm)
pAsm->S[1].src.swizzlez = SQ_SEL_1;
pAsm->S[1].src.swizzlew = SQ_SEL_1;
- if( GL_FALSE == next_ins2(pAsm) )
+ if( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
@@ -5814,6 +5410,7 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
}
GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode)
@@ -5853,6 +5450,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
}
}
#endif
+ if(pILInst[i].CondUpdate == 1)
+ {
+ /* remember dest register used for cond evaluation */
+ /* XXX also handle PROGRAM_OUTPUT registers here? */
+ pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
+ }
switch (pILInst[i].Opcode)
{
@@ -5881,7 +5484,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
return GL_FALSE;
break;
case OPCODE_COS:
- if ( GL_FALSE == assemble_COS(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
return GL_FALSE;
break;
@@ -5923,9 +5526,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
case OPCODE_KIL:
case OPCODE_KIL_NV:
- /* done at OPCODE_SE/SGT...etc. */
- /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
- return GL_FALSE; */
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
+ return GL_FALSE;
break;
case OPCODE_LG2:
if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
@@ -5964,6 +5566,26 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
case OPCODE_MUL:
if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
return GL_FALSE;
+ break;
+
+ case OPCODE_NOISE1:
+ {
+ callPreSub(pR700AsmCode,
+ GLSL_NOISE1,
+ &noise1_presub,
+ pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
+ 1);
+ radeon_error("noise1: not yet supported shader instruction\n");
+ };
+ break;
+ case OPCODE_NOISE2:
+ radeon_error("noise2: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE3:
+ radeon_error("noise3: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE4:
+ radeon_error("noise4: not yet supported shader instruction\n");
break;
case OPCODE_POW:
@@ -5979,7 +5601,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
return GL_FALSE;
break;
case OPCODE_SIN:
- if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
return GL_FALSE;
break;
case OPCODE_SCS:
@@ -5988,151 +5610,23 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
break;
case OPCODE_SEQ:
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
- {
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
- {
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) )
- {
- return GL_FALSE;
- }
- }
- else
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
{
- if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
- {
- return GL_FALSE;
- }
+ return GL_FALSE;
}
break;
case OPCODE_SGT:
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
{
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
-
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
- {
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
- {
- return GL_FALSE;
- }
+ return GL_FALSE;
}
break;
case OPCODE_SGE:
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
-
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
- {
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
- {
- return GL_FALSE;
- }
+ if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
+ {
+ return GL_FALSE;
}
break;
@@ -6144,61 +5638,12 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
SrcRegSave[1] = pILInst[i].SrcReg[1];
pILInst[i].SrcReg[0] = SrcRegSave[1];
pILInst[i].SrcReg[1] = SrcRegSave[0];
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
-
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
{
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
- {
- return GL_FALSE;
- }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
}
- else
- {
- if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
pILInst[i].SrcReg[0] = SrcRegSave[0];
pILInst[i].SrcReg[1] = SrcRegSave[1];
}
@@ -6211,60 +5656,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
SrcRegSave[1] = pILInst[i].SrcReg[1];
pILInst[i].SrcReg[0] = SrcRegSave[1];
pILInst[i].SrcReg[1] = SrcRegSave[0];
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
{
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
-
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
- {
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
- {
- pILInst[i].SrcReg[0] = SrcRegSave[0];
- pILInst[i].SrcReg[1] = SrcRegSave[1];
- return GL_FALSE;
- }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
}
pILInst[i].SrcReg[0] = SrcRegSave[0];
pILInst[i].SrcReg[1] = SrcRegSave[1];
@@ -6272,51 +5668,9 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
break;
case OPCODE_SNE:
- if(OPCODE_IF == pILInst[i+1].Opcode)
- {
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_BRK == pILInst[i+1].Opcode)
- {
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
- {
- return GL_FALSE;
- }
- }
- else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
{
-#ifdef USE_CF_FOR_CONTINUE_BREAK
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
-#else
- pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
-#endif
- if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
- {
- return GL_FALSE;
- }
- }
- else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
- {
- if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) )
- {
- return GL_FALSE;
- }
- }
- else
- {
- if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
- {
- return GL_FALSE;
- }
+ return GL_FALSE;
}
break;
@@ -6344,7 +5698,8 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
}
}
break;
-
+ case OPCODE_DDX:
+ case OPCODE_DDY:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
@@ -6417,7 +5772,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
break;
case OPCODE_BGNSUB:
- if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
+ if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
{
return GL_FALSE;
}
@@ -6432,9 +5787,11 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
case OPCODE_CAL:
if( GL_FALSE == assemble_CAL(pR700AsmCode,
- pILInst[i].BranchTarget,
+ pILInst[i].BranchTarget,
+ uiIL_Shift,
uiNumberInsts,
- pILInst) )
+ pILInst,
+ NULL) )
{
return GL_FALSE;
}
@@ -6471,7 +5828,7 @@ GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
return GL_TRUE;
}
-GLboolean RelocProgram(r700_AssemblerBase * pAsm)
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
{
GLuint i;
GLuint unCFoffset;
@@ -6481,6 +5838,12 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
R700ShaderInstruction * pInst;
R700ControlFlowGenericClause * pCFInst;
+ R700ControlFlowALUClause * pCF_ALU;
+ R700ALUInstruction * pALU;
+ GLuint unConstOffset = 0;
+ GLuint unRegOffset;
+ GLuint unMinRegIndex;
+
plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
/* remove flags init if they are not used */
@@ -6526,6 +5889,11 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
unCFoffset = plstCFmain->uNumOfNode;
+ if(NULL != pILProg->Parameters)
+ {
+ unConstOffset = pILProg->Parameters->NumParameters;
+ }
+
/* Reloc subs */
for(i=0; i<pAsm->unSubArrayPointer; i++)
{
@@ -6563,6 +5931,84 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
pInst = pInst->pNextInst;
};
+ if(NULL != pAsm->subs[i].pPresubDesc)
+ {
+ GLuint uNumSrc;
+
+ unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
+ unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+ pInst = plstCFsub->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+ if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src0_sel += unConstOffset;
+ }
+
+ if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
+ >= SQ_OP3_INST_MUL_LIT )
+ { /* op3 : 3 srcs */
+ if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+ }
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ else
+ {
+ if(pAsm->bR6xx)
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ }
+ if(2 == uNumSrc)
+ { /* 2 srcs */
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ }
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
/* Put sub into main */
plstCFmain->pTail->pNextInst = plstCFsub->pHead;
plstCFmain->pTail = plstCFsub->pTail;
@@ -6576,11 +6022,216 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm)
{
pAsm->callers[i].cf_ptr->m_Word0.f.addr
= pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
+
+ if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+ {
+ unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+ if(NULL != pAsm->callers[i].prelude_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ if(NULL != pAsm->callers[i].finale_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ }
}
return GL_TRUE;
}
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc)
+{
+ /* save assemble context */
+ GLuint starting_temp_register_number_save;
+ GLuint number_used_registers_save;
+ GLuint uFirstHelpReg_save;
+ GLuint uHelpReg_save;
+ GLuint uiCurInst_save;
+ struct prog_instruction *pILInst_save;
+ PRESUB_DESC * pPresubDesc;
+ GLboolean bRet;
+ int i;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+ /* copy srcs to presub inputs */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ for(i=0; i<uNumValidSrc; i++)
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, i, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ next_ins(pAsm);
+ }
+ if(uNumValidSrc > 0)
+ {
+ prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ /* browse thro existing presubs. */
+ for(i=0; i<pAsm->unNumPresub; i++)
+ {
+ if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+ {
+ break;
+ }
+ }
+
+ if(i == pAsm->unNumPresub)
+ { /* not loaded yet */
+ /* save assemble context */
+ number_used_registers_save = pAsm->number_used_registers;
+ uFirstHelpReg_save = pAsm->uFirstHelpReg;
+ uHelpReg_save = pAsm->uHelpReg;
+ starting_temp_register_number_save = pAsm->starting_temp_register_number;
+ pILInst_save = pAsm->pILInst;
+ uiCurInst_save = pAsm->uiCurInst;
+
+ /* alloc in presub */
+ if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+ {
+ pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+ sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+ sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+ if(NULL == pAsm->presubs)
+ {
+ radeon_error("No memeory to allocate built in shader function description structures. \n");
+ return GL_FALSE;
+ }
+ pAsm->unPresubArraySize += 4;
+ }
+
+ pPresubDesc = &(pAsm->presubs[i]);
+ pPresubDesc->sptSigniture = scriptSigniture;
+
+ /* constants offsets need to be final resolved at reloc. */
+ if(0 == pAsm->unNumPresub)
+ {
+ pPresubDesc->unConstantsStart = 0;
+ }
+ else
+ {
+ pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
+ + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+ }
+
+ pPresubDesc->pCompiledSub = pCompiledSub;
+
+ pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+ pPresubDesc->maxStartReg = uFirstHelpReg_save;
+ pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
+
+ pAsm->unNumPresub++;
+
+ /* setup new assemble context */
+ pAsm->starting_temp_register_number = 0;
+ pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+ pAsm->uHelpReg = pAsm->uFirstHelpReg;
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+
+
+ pPresubDesc->number_used_registers = pAsm->number_used_registers;
+
+ /* restore assemble context */
+ pAsm->number_used_registers = number_used_registers_save;
+ pAsm->uFirstHelpReg = uFirstHelpReg_save;
+ pAsm->uHelpReg = uHelpReg_save;
+ pAsm->starting_temp_register_number = starting_temp_register_number_save;
+ pAsm->pILInst = pILInst_save;
+ pAsm->uiCurInst = uiCurInst_save;
+ }
+ else
+ { /* was loaded */
+ pPresubDesc = &(pAsm->presubs[i]);
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+ }
+
+ if(GL_FALSE == bRet)
+ {
+ radeon_error("Shader presub assemble failed. \n");
+ }
+ else
+ {
+ /* copy presub output to real dst */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
+ pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+ pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+ pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+ pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+ next_ins(pAsm);
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+ {
+ pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+ }
+ if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+ {
+ pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+ }
+
+ return bRet;
+}
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -6938,6 +6589,11 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
FREE(pR700AsmCode->callers);
}
+ if(NULL != pR700AsmCode->presubs)
+ {
+ FREE(pR700AsmCode->presubs);
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 130fc89dae..dbd9860f7d 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -34,6 +34,45 @@
#include "r700_shaderinst.h"
#include "r700_shader.h"
+typedef enum LOADABLE_SCRIPT_SIGNITURE
+{
+ GLSL_NOISE1 = 0x10000001,
+ GLSL_NOISE2 = 0x10000002,
+ GLSL_NOISE3 = 0x10000003,
+ GLSL_NOISE4 = 0x10000004
+}LOADABLE_SCRIPT_SIGNITURE;
+
+typedef struct COMPILED_SUB
+{
+ struct prog_instruction *Instructions;
+ GLuint NumInstructions;
+ GLuint NumTemporaries;
+ GLuint NumParameters;
+ GLuint MinRegIndex;
+ GLfloat (*ParameterValues)[4];
+ GLbyte outputSwizzleX;
+ GLbyte outputSwizzleY;
+ GLbyte outputSwizzleZ;
+ GLbyte outputSwizzleW;
+ GLshort srcRegIndex[3];
+ GLushort dstRegIndex;
+}COMPILED_SUB;
+
+typedef struct PRESUB_DESCtag
+{
+ LOADABLE_SCRIPT_SIGNITURE sptSigniture;
+ GLint subIL_Shift;
+ struct prog_src_register InReg[3];
+ struct prog_dst_register OutReg;
+
+ GLushort maxStartReg;
+ GLushort number_used_registers;
+
+ GLuint unConstantsStart;
+
+ COMPILED_SUB * pCompiledSub;
+} PRESUB_DESC;
+
typedef enum SHADER_PIPE_TYPE
{
SPT_VP = 0,
@@ -114,20 +153,22 @@ typedef struct PVSDSTtag
typedef struct PVSINSTtag
{
- BITS literal :2;
+ BITS literal_slots :2;
BITS SaturateMode :2;
+ BITS index_mode :3;
} PVSINST;
typedef struct PVSSRCtag
{
- BITS rtype:4;
+ BITS rtype:3;
BITS addrmode0:1;
- BITS reg:10; //15 (8)
+ BITS reg:10; //14 (8)
BITS swizzlex:3;
BITS swizzley:3;
BITS swizzlez:3;
- BITS swizzlew:3; //27
+ BITS swizzlew:3; //26
+ BITS abs:1;
BITS negx:1;
BITS negy:1;
BITS negz:1;
@@ -294,6 +335,7 @@ typedef struct SUB_OFFSET
GLint subIL_Offset;
GLuint unCFoffset;
GLuint unStackDepthMax;
+ PRESUB_DESC * pPresubDesc;
TypedShaderList lstCFInstructions_local;
} SUB_OFFSET;
@@ -302,6 +344,9 @@ typedef struct CALLER_POINTER
GLint subIL_Offset;
GLint subDescIndex;
R700ControlFlowGenericClause* cf_ptr;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr;
+ R700ControlFlowGenericClause* finale_cf_ptr;
} CALLER_POINTER;
#define SQ_MAX_CALL_DEPTH 0x00000020
@@ -343,8 +388,10 @@ typedef struct r700_AssemblerBase
PVSDWORD D;
PVSDWORD D2;
PVSDWORD S[3];
+ PVSDWORD C[4];
unsigned int uLastPosUpdate;
+ unsigned int last_cond_register;
OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
@@ -415,6 +462,7 @@ typedef struct r700_AssemblerBase
SHADER_PIPE_TYPE currentShaderType;
struct prog_instruction * pILInst;
GLuint uiCurInst;
+ GLubyte SamplerUnits[MAX_SAMPLERS];
GLboolean bR6xx;
/* helper to decide which type of instruction to assemble */
GLboolean is_tex;
@@ -432,6 +480,13 @@ typedef struct r700_AssemblerBase
GLuint unCFflags;
+ PRESUB_DESC * presubs;
+ GLuint unPresubArraySize;
+ GLuint unNumPresub;
+ GLuint unCurNumILInsts;
+
+ GLuint unVetTexBits;
+
} r700_AssemblerBase;
//Internal use
@@ -453,7 +508,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
GLboolean is_reduction_opcode(PVSDWORD * dest);
GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
GLboolean IsTex(gl_inst_opcode Opcode);
GLboolean IsAlu(gl_inst_opcode Opcode);
@@ -526,13 +581,6 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
GLboolean next_ins(r700_AssemblerBase *pAsm);
-GLboolean next_ins2(r700_AssemblerBase *pAsm);
-GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm);
-
-/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */
-GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
-GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
-
GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
@@ -546,7 +594,6 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
GLboolean assemble_BAD(char *opcode_str);
GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
-GLboolean assemble_COS(r700_AssemblerBase *pAsm);
GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
GLboolean assemble_DST(r700_AssemblerBase *pAsm);
GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
@@ -567,12 +614,12 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
GLboolean assemble_POW(r700_AssemblerBase *pAsm);
GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
-GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode);
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
@@ -588,13 +635,15 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
GLboolean assemble_COND(r700_AssemblerBase *pAsm);
GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
-GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex);
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift);
GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
GLboolean assemble_RET(r700_AssemblerBase *pAsm);
GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
GLint nILindex,
+ GLuint uiIL_Offest,
GLuint uiNumberInsts,
- struct prog_instruction *pILInst);
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc);
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
@@ -605,16 +654,23 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
BITS depth_channel_select);
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ /* struct prog_instruction ** pILInstParent, */
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc);
//Interface
GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode);
GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
-GLboolean RelocProgram(r700_AssemblerBase * pAsm);
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg);
GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 8126777bf4..c124e02184 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
struct radeon_bo *bo = NULL;
unsigned int i;
BATCH_LOCALS(&context->radeon);
@@ -52,7 +55,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
uint32_t offset;
if (t) {
@@ -71,7 +74,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH(i * 7);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 7);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
@@ -95,21 +107,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
}
}
+#define SAMPLER_STRIDE 3
+
static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
unsigned int i;
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
if (t) {
BEGIN_BATCH_NO_AUTOSTATE(5);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
- R600_OUT_BATCH(i * 3);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 3);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
@@ -1163,7 +1189,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
count += 3;
if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
- for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+ /* targets are enabled in r700SetRenderTarget but state
+ size is calculated before that. Until MRT's are done
+ hardcode target0 as enabled. */
+ count += 3;
+ for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
if (r700->render_target[ui].enabled)
count += 3;
}
@@ -1306,9 +1336,9 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+ ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
- ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
ALLOC_STATE(vgt, always, 41, r700SendVGTState);
ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
index c6546ab00c..526d3843d1 100644
--- a/src/mesa/drivers/dri/r600/r700_clear.c
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -57,6 +57,10 @@ void r700Clear(GLcontext * ctx, GLbitfield mask)
radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask);
+ if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+ context->radeon.front_buffer_dirty = GL_TRUE;
+ }
+
if( GL_TRUE == r700ClearFast(context, mask) )
{
return;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index e9ef6c8695..ce2d9fdf79 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -34,6 +34,7 @@
#include "main/imports.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
+#include "shader/program.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
@@ -42,6 +43,54 @@
#include "r700_debug.h"
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+ static const gl_state_index winstate[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+ struct prog_instruction *newInst, *inst;
+ GLint win_size; /* state reference */
+ GLuint wpos_temp; /* temp register */
+ int i, j;
+
+ /* PARAM win_size = STATE_FB_SIZE */
+ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+ wpos_temp = fprog->Base.NumTemporaries++;
+
+ /* scan program where WPOS is used and replace with wpos_temp */
+ inst = fprog->Base.Instructions;
+ for (i = 0; i < fprog->Base.NumInstructions; i++) {
+ for (j=0; j < 3; j++) {
+ if(inst->SrcReg[j].File == PROGRAM_INPUT &&
+ inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+ inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[j].Index = wpos_temp;
+ }
+ }
+ inst++;
+ }
+
+ _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+ newInst = fprog->Base.Instructions;
+ /* invert wpos.y
+ * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+ newInst[0].Opcode = OPCODE_ADD;
+ newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+ newInst[0].DstReg.Index = wpos_temp;
+ newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+ newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+ newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+ newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+ newInst[0].SrcReg[1].Index = win_size;
+ newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
//TODO : Validate FP input with VP output.
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
struct gl_fragment_program *mesa_fp,
@@ -93,7 +142,7 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
}
}
-
+
/* order has been taken care of */
#if 1
for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
@@ -149,6 +198,17 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->number_used_registers += unMaxVarying + 1;
}
#endif
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+ }
/* Map temporary registers (GPRs) */
pAsm->starting_temp_register_number = pAsm->number_used_registers;
@@ -303,10 +363,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
GLuint number_of_colors_exported;
GLboolean z_enabled = GL_FALSE;
GLuint unBit;
+ int i;
//Init_Program
Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
- Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
+
+ if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+ {
+ insert_wpos_code(ctx, mesa_fp);
+ }
+
+ Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
{
@@ -315,7 +382,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
InitShaderProgram(&(fp->r700AsmCode));
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
+ }
+
+ fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
+
if( GL_FALSE == AssembleInstr(0,
+ 0,
mesa_fp->Base.NumInstructions,
&(mesa_fp->Base.Instructions[0]),
&(fp->r700AsmCode)) )
@@ -328,7 +403,7 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
return GL_FALSE;
}
- if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) )
+ if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
{
return GL_FALSE;
}
@@ -398,6 +473,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
unsigned int unNumOfReg;
unsigned int unBit;
GLuint exportCount;
+ GLboolean point_sprite = GL_FALSE;
if(GL_FALSE == fp->loaded)
{
@@ -451,6 +527,50 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
}
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
+ {
+ ui += 1;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
+ SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
+ }
+ else
+ {
+ CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ }
+
+ /* see if we need any point_sprite replacements */
+ for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++)
+ {
+ if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE)
+ point_sprite = GL_TRUE;
+ }
+
+ if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
+ {
+ /* for FRAG_ATTRIB_PNTC we need to increase num_interp */
+ if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+ {
+ ui++;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ }
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
+ if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ else
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ }
+ else
+ {
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ }
+
+
ui = (unNumOfReg < ui) ? ui : unNumOfReg;
SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
@@ -470,6 +590,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
struct r700_vertex_program_cont *vpc =
(struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+ for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+ r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
unBit = 1 << FRAG_ATTRIB_WPOS;
if(mesa_fp->Base.InputsRead & unBit)
{
@@ -532,9 +656,43 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
SEMANTIC_shift, SEMANTIC_mask);
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ /* ARB_point_sprite */
+ if(ctx->Point.CoordReplace[i] == GL_TRUE)
+ {
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
}
}
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
+
+
+
+
for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
{
unBit = 1 << i;
@@ -583,6 +741,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
} else
r700->ps.num_consts = 0;
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = r700->ps.num_consts;
+ for(ui=0; ui<pAsm->unNumPresub; ui++)
+ {
+ pCompiledSub = pAsm->presubs[ui].pCompiledSub;
+
+ r700->ps.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
index e562bfa478..39c59c9201 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.h
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -48,6 +48,8 @@ struct r700_fragment_program
};
/* Internal */
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
struct gl_fragment_program *mesa_fp,
GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h
index 2829cca0a3..cdb9a570f7 100644
--- a/src/mesa/drivers/dri/r600/r700_shaderinst.h
+++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h
@@ -42,6 +42,13 @@
#define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0
#define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0
+//richard dec.10 glsl
+#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000
+#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012
+#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012
+#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012
+//-------------------
+
#define SHADERINST_TYPEMASK_CF 0x10
#define SHADERINST_TYPEMASK_ALU 0x20
#define SHADERINST_TYPEMASK_TEX 0x40
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index d7420678ff..16b05d5cd9 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -705,6 +705,10 @@ static void r700UpdateCulling(GLcontext * ctx)
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
break;
}
+
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
}
static void r700UpdateLineStipple(GLcontext * ctx)
@@ -1227,13 +1231,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx)
/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
* correctly by selecting the correct front and back face
*/
- if (ctx->Polygon.FrontFace == GL_CCW) {
- f = ctx->Polygon.FrontMode;
- b = ctx->Polygon.BackMode;
- } else {
- f = ctx->Polygon.BackMode;
- b = ctx->Polygon.FrontMode;
- }
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
/* Enable polygon mode */
SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index d3d1da7959..90fac078ff 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -337,7 +337,15 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
InitShaderProgram(&(vp->r700AsmCode));
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
+ }
+
+ vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
+
if(GL_FALSE == AssembleInstr(0,
+ 0,
vp->mesa_program->Base.NumInstructions,
&(vp->mesa_program->Base.Instructions[0]),
&(vp->r700AsmCode)) )
@@ -350,7 +358,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
return NULL;
}
- if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) )
+ if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
{
return GL_FALSE;
}
@@ -667,5 +675,24 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
} else
r700->vs.num_consts = 0;
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = r700->vs.num_consts;
+ for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
+ {
+ pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
+
+ r700->vs.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/radeon_bo.c b/src/mesa/drivers/dri/r600/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs.c b/src/mesa/drivers/dri/r600/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h \ No newline at end of file