summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r600
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-01-08 15:42:57 +0000
committerJosé Fonseca <jfonseca@vmware.com>2010-01-08 15:42:57 +0000
commit080c40ab32b2abd6d8381b4a0cc143d36a1652b2 (patch)
treee173767ebc5a82d81b9fc086449d915e29348976 /src/mesa/drivers/dri/r600
parent9cdf6f025b2ed55cfb13dd09f870f01d0c7947d3 (diff)
parenta1de400e8de06a80ab140bb0fa950e990607572d (diff)
Merge remote branch 'origin/master' into lp-binning
Conflicts: src/gallium/auxiliary/util/u_surface.c src/gallium/drivers/llvmpipe/Makefile src/gallium/drivers/llvmpipe/SConscript src/gallium/drivers/llvmpipe/lp_bld_arit.c src/gallium/drivers/llvmpipe/lp_bld_flow.c src/gallium/drivers/llvmpipe/lp_bld_interp.c src/gallium/drivers/llvmpipe/lp_clear.c src/gallium/drivers/llvmpipe/lp_context.c src/gallium/drivers/llvmpipe/lp_context.h src/gallium/drivers/llvmpipe/lp_draw_arrays.c src/gallium/drivers/llvmpipe/lp_jit.c src/gallium/drivers/llvmpipe/lp_jit.h src/gallium/drivers/llvmpipe/lp_prim_vbuf.c src/gallium/drivers/llvmpipe/lp_setup.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_state.h src/gallium/drivers/llvmpipe/lp_state_blend.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.c src/gallium/drivers/llvmpipe/lp_state_sampler.c src/gallium/drivers/llvmpipe/lp_state_surface.c src/gallium/drivers/llvmpipe/lp_tex_cache.c src/gallium/drivers/llvmpipe/lp_tex_cache.h src/gallium/drivers/llvmpipe/lp_tex_sample.h src/gallium/drivers/llvmpipe/lp_tile_cache.c
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r--src/mesa/drivers/dri/r600/Makefile3
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c234
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h16
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c293
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.h33
-rw-r--r--src/mesa/drivers/dri/r600/r600_reg_r6xx.h6
-rw-r--r--src/mesa/drivers/dri/r600/r600_reg_r7xx.h2
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c28
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c138
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c2780
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h203
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c335
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c8
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c414
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.h8
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.c55
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c461
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.c9
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.h4
-rw-r--r--src/mesa/drivers/dri/r600/r700_shaderinst.h7
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c165
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.h2
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c185
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.h9
l---------src/mesa/drivers/dri/r600/radeon_bo.c1
l---------src/mesa/drivers/dri/r600/radeon_bo_int_drm.h1
l---------src/mesa/drivers/dri/r600/radeon_cs.c1
l---------src/mesa/drivers/dri/r600/radeon_cs_int_drm.h1
28 files changed, 4051 insertions, 1351 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 7d5a7b1ab6..26f47b7268 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -14,7 +14,7 @@ EGL_SOURCES = server/radeon_egl.c
endif
ifeq ($(RADEON_LDFLAGS),)
-CS_SOURCES = radeon_cs_space_drm.c
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
endif
COMMON_SOURCES = \
@@ -76,4 +76,3 @@ DRI_LIB_DEPS += $(RADEON_LDFLAGS)
include ../Makefile.template
-symlinks:
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index 3cfe03a45f..370bb04f93 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -52,29 +52,49 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_mipmap_tree.h"
#include "radeon_reg.h"
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
+struct r600_cs_manager_legacy
+{
+ struct radeon_cs_manager base;
+ struct radeon_context *ctx;
+ /* hack for scratch stuff */
+ uint32_t pending_age;
+ uint32_t pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+ struct radeon_cs_reloc base;
+ uint32_t cindices;
+ uint32_t *indices;
+ uint32_t *reloc_indices;
+};
-static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
- uint32_t ndw)
+static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
+ uint32_t ndw)
{
- struct radeon_cs *cs;
+ struct radeon_cs_int *csi;
- cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
- if (cs == NULL) {
+ csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+ if (csi == NULL) {
return NULL;
}
- cs->csm = csm;
- cs->ndw = (ndw + 0x3FF) & (~0x3FF);
- cs->packets = (uint32_t*)malloc(4*cs->ndw);
- if (cs->packets == NULL) {
- free(cs);
+ csi->csm = csm;
+ csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+ csi->packets = (uint32_t*)malloc(4*csi->ndw);
+ if (csi->packets == NULL) {
+ free(csi);
return NULL;
}
- cs->relocs_total_size = 0;
- return cs;
+ csi->relocs_total_size = 0;
+ return csi;
}
-static int r600_cs_write_reloc(struct radeon_cs *cs,
+static int r600_cs_write_reloc(struct radeon_cs_int *csi,
struct radeon_bo *bo,
uint32_t read_domain,
uint32_t write_domain,
@@ -83,7 +103,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
/* check domains */
if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
/* in one CS a bo can only be in read or write domain but not
@@ -98,7 +118,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
return -EINVAL;
}
/* check if bo is already referenced */
- for(i = 0; i < cs->crelocs; i++) {
+ for(i = 0; i < csi->crelocs; i++) {
uint32_t *indices;
uint32_t *reloc_indices;
@@ -129,109 +149,108 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
}
relocs[i].indices = indices;
relocs[i].reloc_indices = reloc_indices;
- relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
- relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
+ relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
+ relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
return 0;
}
}
/* add bo to reloc */
relocs = (struct r600_cs_reloc_legacy*)
- realloc(cs->relocs,
- sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
+ realloc(csi->relocs,
+ sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
if (relocs == NULL) {
return -ENOMEM;
}
- cs->relocs = relocs;
- relocs[cs->crelocs].base.bo = bo;
- relocs[cs->crelocs].base.read_domain = read_domain;
- relocs[cs->crelocs].base.write_domain = write_domain;
- relocs[cs->crelocs].base.flags = flags;
- relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
- relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
- if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
+ csi->relocs = relocs;
+ relocs[csi->crelocs].base.bo = bo;
+ relocs[csi->crelocs].base.read_domain = read_domain;
+ relocs[csi->crelocs].base.write_domain = write_domain;
+ relocs[csi->crelocs].base.flags = flags;
+ relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
+ relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
+ if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
{
return -ENOMEM;
}
- relocs[cs->crelocs].indices[0] = cs->cdw;
- relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
- cs->section_cdw += 2;
- cs->cdw += 2;
- relocs[cs->crelocs].cindices = 1;
- cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
- cs->crelocs++;
+ relocs[csi->crelocs].indices[0] = csi->cdw;
+ relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
+ csi->section_cdw += 2;
+ csi->cdw += 2;
+ relocs[csi->crelocs].cindices = 1;
+ csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+ csi->crelocs++;
radeon_bo_ref(bo);
return 0;
}
-static int r600_cs_begin(struct radeon_cs *cs,
+static int r600_cs_begin(struct radeon_cs_int *csi,
uint32_t ndw,
const char *file,
const char *func,
int line)
{
- if (cs->section) {
+ if (csi->section_ndw) {
fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
- cs->section_file, cs->section_func, cs->section_line);
+ csi->section_file, csi->section_func, csi->section_line);
fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 1;
- cs->section_ndw = ndw;
- cs->section_cdw = 0;
- cs->section_file = file;
- cs->section_func = func;
- cs->section_line = line;
+ csi->section_ndw = ndw;
+ csi->section_cdw = 0;
+ csi->section_file = file;
+ csi->section_func = func;
+ csi->section_line = line;
- if (cs->cdw + ndw > cs->ndw) {
+ if (csi->cdw + ndw > csi->ndw) {
uint32_t tmp, *ptr;
int num = (ndw > 0x400) ? ndw : 0x400;
- tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
- ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+ tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
+ ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
if (ptr == NULL) {
return -ENOMEM;
}
- cs->packets = ptr;
- cs->ndw = tmp;
+ csi->packets = ptr;
+ csi->ndw = tmp;
}
return 0;
}
-static int r600_cs_end(struct radeon_cs *cs,
+static int r600_cs_end(struct radeon_cs_int *csi,
const char *file,
const char *func,
int line)
{
- if (!cs->section) {
+ if (!csi->section_ndw) {
fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
- cs->section = 0;
- if ( cs->section_ndw != cs->section_cdw ) {
+ if ( csi->section_ndw != csi->section_cdw ) {
fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
- cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
- fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
- cs->section_ndw, cs->cdw, cs->section_cdw);
+ csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
+ fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
+ csi->section_ndw, csi->cdw, csi->section_cdw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
return -EPIPE;
}
+ csi->section_ndw = 0;
- if (cs->cdw > cs->ndw) {
+ if (csi->cdw > csi->ndw) {
fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
- cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
+ csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
fprintf(stderr, "CS section end at (%s,%s,%d)\n",
file, func, line);
assert(0);
@@ -240,21 +259,21 @@ static int r600_cs_end(struct radeon_cs *cs,
return 0;
}
-static int r600_cs_process_relocs(struct radeon_cs *cs,
+static int r600_cs_process_relocs(struct radeon_cs_int *csi,
uint32_t * reloc_chunk,
uint32_t * length_dw_reloc_chunk)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i, j, r;
uint32_t offset_dw = 0;
- csm = (struct r600_cs_manager_legacy*)cs->csm;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+ csm = (struct r600_cs_manager_legacy*)csi->csm;
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
restart:
- for (i = 0; i < cs->crelocs; i++) {
- uint32_t soffset, eoffset, asicoffset;
+ for (i = 0; i < csi->crelocs; i++) {
+ uint32_t soffset, eoffset;
r = radeon_bo_legacy_validate(relocs[i].base.bo,
&soffset, &eoffset);
@@ -262,32 +281,20 @@ restart:
goto restart;
}
if (r) {
- fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+ fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n",
relocs[i].base.bo, soffset, eoffset);
return r;
}
- asicoffset = soffset;
for (j = 0; j < relocs[i].cindices; j++) {
- if (asicoffset >= eoffset) {
- /* radeon_bo_debug(relocs[i].base.bo, 12); */
- fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
- relocs[i].base.bo, soffset, eoffset);
- fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
- relocs[i].base.bo,
- cs->packets[relocs[i].indices[j]],
- eoffset);
- exit(0);
- return -EINVAL;
- }
/* pkt3 nop header in ib chunk */
- cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+ csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
/* reloc index in ib chunk */
- cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+ csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
}
/* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
- reloc_chunk[offset_dw] = asicoffset;
+ reloc_chunk[offset_dw] = soffset;
reloc_chunk[offset_dw + 3] = 0;
offset_dw += 4;
@@ -298,14 +305,14 @@ restart:
return 0;
}
-static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
+static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct r600_cs_reloc_legacy *relocs;
int i;
- relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
- for (i = 0; i < cs->crelocs; i++) {
+ relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+ for (i = 0; i < csi->crelocs; i++) {
radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
radeon_bo_unref(relocs[i].base.bo);
}
@@ -313,21 +320,21 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
}
#if 0
-static void dump_cmdbuf(struct radeon_cs *cs)
+static void dump_cmdbuf(struct radeon_cs_int *csi)
{
int i;
fprintf(stderr,"--start--\n");
- for (i = 0; i < cs->cdw; i++){
- fprintf(stderr,"0x%08x\n", cs->packets[i]);
+ for (i = 0; i < csi->cdw; i++){
+ fprintf(stderr,"0x%08x\n", csi->packets[i]);
}
fprintf(stderr,"--end--\n");
}
#endif
-static int r600_cs_emit(struct radeon_cs *cs)
+static int r600_cs_emit(struct radeon_cs_int *csi)
{
- struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+ struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
struct drm_radeon_cs cs_cmd;
struct drm_radeon_cs_chunk cs_chunk[2];
uint32_t length_dw_reloc_chunk;
@@ -341,9 +348,9 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_count = 1;
- reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
+ reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
- r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
+ r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
if (r) {
free(reloc_chunk);
return 0;
@@ -351,8 +358,8 @@ static int r600_cs_emit(struct radeon_cs *cs)
/* raw ib chunk */
cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
- cs_chunk[0].length_dw = cs->cdw;
- cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
+ cs_chunk[0].length_dw = csi->cdw;
+ cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
/* reloc chaunk */
cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
@@ -370,7 +377,7 @@ static int r600_cs_emit(struct radeon_cs *cs)
do
{
- r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+ r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
retry++;
} while (r == -EAGAIN && retry < 1000);
@@ -381,11 +388,11 @@ static int r600_cs_emit(struct radeon_cs *cs)
csm->pending_age = cs_cmd.cs_id;
- r600_cs_set_age(cs);
+ r600_cs_set_age(csi);
- cs->csm->read_used = 0;
- cs->csm->vram_write_used = 0;
- cs->csm->gart_write_used = 0;
+ csi->csm->read_used = 0;
+ csi->csm->vram_write_used = 0;
+ csi->csm->gart_write_used = 0;
free(reloc_chunk);
@@ -405,35 +412,34 @@ static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
}
}
-static int r600_cs_destroy(struct radeon_cs *cs)
+static int r600_cs_destroy(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- free(cs->packets);
- free(cs);
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ free(csi->packets);
+ free(csi);
return 0;
}
-static int r600_cs_erase(struct radeon_cs *cs)
+static int r600_cs_erase(struct radeon_cs_int *csi)
{
- r600_cs_free_reloc(cs->relocs, cs->crelocs);
- free(cs->relocs);
- cs->relocs_total_size = 0;
- cs->relocs = NULL;
- cs->crelocs = 0;
- cs->cdw = 0;
- cs->section = 0;
+ r600_cs_free_reloc(csi->relocs, csi->crelocs);
+ free(csi->relocs);
+ csi->relocs_total_size = 0;
+ csi->relocs = NULL;
+ csi->crelocs = 0;
+ csi->cdw = 0;
return 0;
}
-static int r600_cs_need_flush(struct radeon_cs *cs)
+static int r600_cs_need_flush(struct radeon_cs_int *csi)
{
/* this function used to flush when the BO usage got to
* a certain size, now the higher levels handle this better */
return 0;
}
-static void r600_cs_print(struct radeon_cs *cs, FILE *file)
+static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
{
}
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index eba43d37b6..dff0009699 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_IT_SET_CTL_CONST 0x00006F00
#define R600_IT_SURFACE_BASE_UPDATE 0x00007300
-struct r600_cs_manager_legacy
-{
- struct radeon_cs_manager base;
- struct radeon_context *ctx;
- /* hack for scratch stuff */
- uint32_t pending_age;
- uint32_t pending_count;
-};
-
-struct r600_cs_reloc_legacy {
- struct radeon_cs_reloc base;
- uint32_t cindices;
- uint32_t *indices;
- uint32_t *reloc_indices;
-};
-
struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
/**
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 969144ba12..cb549497f5 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r600_cmdbuf.h"
#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
+#include "radeon_queryobj.h"
#include "r700_state.h"
#include "r700_ioctl.h"
@@ -73,11 +74,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
-/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
-int future_hw_tcl_on = 1;
-int hw_tcl_on = 1;
+#define R600_ENABLE_GLSL_TEST 1
#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_equation_separate
@@ -92,14 +92,15 @@ int hw_tcl_on = 1;
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
-#include "extension_helper.h"
+#include "main/remap_helper.h"
-extern const struct tnl_pipeline_stage *r700_pipeline[];
-
-const struct dri_extension card_extensions[] = {
+static const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
+ {"GL_ARB_depth_clamp", NULL},
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_fragment_program_shadow", NULL},
+ {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
{"GL_ARB_multitexture", NULL},
{"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
{"GL_ARB_shadow", NULL},
@@ -111,6 +112,7 @@ const struct dri_extension card_extensions[] = {
{"GL_ARB_texture_env_crossbar", NULL},
{"GL_ARB_texture_env_dot3", NULL},
{"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_texture_non_power_of_two", NULL},
{"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
{"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
@@ -131,6 +133,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_vertex_array_bgra", NULL},
{"GL_EXT_texture_sRGB", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
@@ -146,7 +149,7 @@ const struct dri_extension card_extensions[] = {
};
-const struct dri_extension mm_extensions[] = {
+static const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
};
@@ -155,21 +158,29 @@ const struct dri_extension mm_extensions[] = {
* The GL 2.0 functions are needed to make display lists work with
* functions added by GL_ATI_separate_stencil.
*/
-const struct dri_extension gl_20_extension[] = {
+static const struct dri_extension gl_20_extension[] = {
+#ifdef R600_ENABLE_GLSL_TEST
+ {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+#else
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
+#endif /* R600_ENABLE_GLSL_TEST */
+ {NULL, NULL}
};
-
-static void r600RunPipeline(GLcontext * ctx)
-{
- _mesa_lock_context_textures(ctx);
-
- if (ctx->NewState)
- _mesa_update_state_locked(ctx);
-
- _tnl_run_pipeline(ctx);
- _mesa_unlock_context_textures(ctx);
-}
+static const struct tnl_pipeline_stage *r600_pipeline[] = {
+ /* Catch any t&l fallbacks
+ */
+ &_tnl_vertex_transform_stage,
+ &_tnl_normal_transform_stage,
+ &_tnl_lighting_stage,
+ &_tnl_fog_coordinate_stage,
+ &_tnl_texgen_stage,
+ &_tnl_texture_transform_stage,
+ &_tnl_point_attenuation_stage,
+ &_tnl_vertex_program_stage,
+ &_tnl_render_stage,
+ 0,
+};
static void r600_get_lock(radeonContextPtr rmesa)
{
@@ -180,7 +191,7 @@ static void r600_get_lock(radeonContextPtr rmesa)
if (!rmesa->radeonScreen->kernel_mm)
radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
}
-}
+}
static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
{
@@ -202,6 +213,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
context->radeon.Fallback &= ~bit;
}
+static void r600_emit_query_finish(radeonContextPtr radeon)
+{
+ context_t *context = (context_t*) radeon;
+ BATCH_LOCALS(&context->radeon);
+
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+ R600_OUT_BATCH(ZPASS_DONE);
+ R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */
+ R600_OUT_BATCH(0x00000000);
+ R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
static void r600_init_vtbl(radeonContextPtr radeon)
{
radeon->vtbl.get_lock = r600_get_lock;
@@ -210,71 +239,12 @@ static void r600_init_vtbl(radeonContextPtr radeon)
radeon->vtbl.swtcl_flush = NULL;
radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
radeon->vtbl.fallback = r600_fallback;
+ radeon->vtbl.emit_query_finish = r600_emit_query_finish;
}
-/* Create the device specific rendering context.
- */
-GLboolean r600CreateContext(const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
- void *sharedContextPrivate)
+static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
{
- __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
- radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
- struct dd_function_table functions;
- context_t *r600;
- GLcontext *ctx;
-
- assert(glVisual);
- assert(driContextPriv);
- assert(screen);
-
- /* Allocate the R600 context */
- r600 = (context_t*) CALLOC(sizeof(*r600));
- if (!r600) {
- radeon_error("Failed to allocate memory for context.\n");
- return GL_FALSE;
- }
-
- if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- hw_tcl_on = future_hw_tcl_on = 0;
-
- r600_init_vtbl(&r600->radeon);
- /* Parse configuration files.
- * Do this here so that initialMaxAnisotropy is set before we create
- * the default textures.
- */
- driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
- screen->driScreen->myNum, "r600");
-
- r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
- "def_max_anisotropy");
-
- /* Init default driver functions then plug in our R600-specific functions
- * (the texture functions are especially important)
- */
- _mesa_init_driver_functions(&functions);
-
- r700InitStateFuncs(&functions);
- r600InitTextureFuncs(&functions);
- r700InitShaderFuncs(&functions);
- r700InitIoctlFuncs(&functions);
- radeonInitBufferObjectFuncs(&functions);
-
- if (!radeonInitContext(&r600->radeon, &functions,
- glVisual, driContextPriv,
- sharedContextPrivate)) {
- radeon_error("Initializing context failed.\n");
- FREE(r600);
- return GL_FALSE;
- }
-
- /* Init r600 context data */
- /* Set the maximum texture size small enough that we can guarentee that
- * all texture units can bind a maximal texture and have them both in
- * texturable memory at once.
- */
-
- ctx = r600->radeon.glCtx;
+ context_t *r600 = R700_CONTEXT(ctx);
ctx->Const.MaxTextureImageUnits =
driQueryOptioni(&r600->radeon.optionCache, "texture_image_units");
@@ -299,38 +269,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
ctx->Const.MaxLineWidth = 0xffff / 8.0;
ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
- /* Needs further modifications */
-#if 0
- ctx->Const.MaxArrayLockSize =
- ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
-#endif
-
- ctx->Const.MaxDrawBuffers = 1;
-
- /* Initialize the software rasterizer and helper modules.
- */
- _swrast_CreateContext(ctx);
- _vbo_CreateContext(ctx);
- _tnl_CreateContext(ctx);
- _swsetup_CreateContext(ctx);
- _swsetup_Wakeup(ctx);
- _ae_create_context(ctx);
-
- /* Install the customized pipeline:
- */
- _tnl_destroy_pipeline(ctx);
- _tnl_install_pipeline(ctx, r700_pipeline);
-
- /* Try and keep materials and vertices separate:
- */
-/* _tnl_isolate_materials(ctx, GL_TRUE); */
-
- /* Configure swrast and TNL to match hardware characteristics:
- */
- _swrast_allow_pixel_fog(ctx, GL_FALSE);
- _swrast_allow_vertex_fog(ctx, GL_TRUE);
- _tnl_allow_pixel_fog(ctx, GL_FALSE);
- _tnl_allow_vertex_fog(ctx, GL_TRUE);
+ ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */
/* 256 for reg-based consts, inline consts also supported */
ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */
@@ -354,15 +293,38 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
ctx->Const.FragmentProgram.MaxNativeInstructions = 8192;
ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
- ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
- ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+}
- radeon_init_debug();
+static void r600ParseOptions(context_t *r600, radeonScreenPtr screen)
+{
+ /* Parse configuration files.
+ * Do this here so that initialMaxAnisotropy is set before we create
+ * the default textures.
+ */
+ driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r600");
+
+ r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
+ "def_max_anisotropy");
+
+}
+
+static void r600InitGLExtensions(GLcontext *ctx)
+{
+ context_t *r600 = R700_CONTEXT(ctx);
driInitExtensions(ctx, card_extensions, GL_TRUE);
if (r600->radeon.radeonScreen->kernel_mm)
driInitExtensions(ctx, mm_extensions, GL_FALSE);
+#ifdef R600_ENABLE_GLSL_TEST
+ driInitExtensions(ctx, gl_20_extension, GL_TRUE);
+ _mesa_enable_2_0_extensions(ctx);
+
+ /* glsl compiler has problem if this is not GL_TRUE */
+ ctx->Shader.EmitCondCodes = GL_TRUE;
+#endif /* R600_ENABLE_GLSL_TEST */
+
if (driQueryOptionb
(&r600->radeon.optionCache, "disable_stencil_two_side"))
_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
@@ -377,21 +339,100 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
- r700InitDraw(ctx);
+ /* XXX: RV740 only seems to report results from half of its DBs */
+ if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740)
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+}
- radeon_fbo_init(&r600->radeon);
- radeonInitSpanFuncs( ctx );
+/* Create the device specific rendering context.
+ */
+GLboolean r600CreateContext(const __GLcontextModes * glVisual,
+ __DRIcontext * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ struct dd_function_table functions;
+ context_t *r600;
+ GLcontext *ctx;
- r600InitCmdBuf(r600);
+ assert(glVisual);
+ assert(driContextPriv);
+ assert(screen);
- r700InitState(r600->radeon.glCtx);
+ /* Allocate the R600 context */
+ r600 = (context_t*) CALLOC(sizeof(*r600));
+ if (!r600) {
+ radeon_error("Failed to allocate memory for context.\n");
+ return GL_FALSE;
+ }
+
+ r600ParseOptions(r600, screen);
+
+ r600->radeon.radeonScreen = screen;
+ r600_init_vtbl(&r600->radeon);
+
+ /* Init default driver functions then plug in our R600-specific functions
+ * (the texture functions are especially important)
+ */
+ _mesa_init_driver_functions(&functions);
- TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline;
+ r700InitStateFuncs(&functions);
+ r600InitTextureFuncs(&functions);
+ r700InitShaderFuncs(&functions);
+ radeonInitQueryObjFunctions(&functions);
+ r700InitIoctlFuncs(&functions);
+ radeonInitBufferObjectFuncs(&functions);
- if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) {
- radeon_warning("disabling 3D acceleration\n");
+ if (!radeonInitContext(&r600->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ radeon_error("Initializing context failed.\n");
+ FREE(r600);
+ return GL_FALSE;
}
+ ctx = r600->radeon.glCtx;
+
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+ r600InitConstValues(ctx, screen);
+
+ _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext(ctx);
+ _vbo_CreateContext(ctx);
+ _tnl_CreateContext(ctx);
+ _swsetup_CreateContext(ctx);
+ _swsetup_Wakeup(ctx);
+
+ /* Install the customized pipeline:
+ */
+ _tnl_destroy_pipeline(ctx);
+ _tnl_install_pipeline(ctx, r600_pipeline);
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+ /* Configure swrast and TNL to match hardware characteristics:
+ */
+ _swrast_allow_pixel_fog(ctx, GL_FALSE);
+ _swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _tnl_allow_pixel_fog(ctx, GL_FALSE);
+ _tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+ radeon_init_debug();
+
+ r700InitDraw(ctx);
+
+ radeon_fbo_init(&r600->radeon);
+ radeonInitSpanFuncs( ctx );
+ r600InitCmdBuf(r600);
+ r700InitState(r600->radeon.glCtx);
+
+ r600InitGLExtensions(ctx);
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
index 7f68820fda..a1b4af715e 100644
--- a/src/mesa/drivers/dri/r600/r600_context.h
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -58,29 +58,6 @@ typedef struct r600_context context_t;
#include "main/mm.h"
-/************ DMA BUFFERS **************/
-
-/* The blit width for texture uploads
- */
-#define R600_BLIT_WIDTH_BYTES 1024
-#define R600_MAX_TEXTURE_UNITS 8
-
-struct r600_texture_state {
- int tc_count; /* number of incoming texture coordinates from VAP */
-};
-
-/* Perhaps more if we store programs in vmem? */
-/* drm_r600_cmd_header_t->vpu->count is unsigned char */
-#define VSF_MAX_FRAGMENT_LENGTH (255*4)
-
-/* Can be tested with colormat currently. */
-#define VSF_MAX_FRAGMENT_TEMPS (14)
-
-#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
-#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
-
-extern int hw_tcl_on;
-
#define COLOR_IS_RGBA
#define TAG(x) r600##x
#include "tnl_dd/t_dd_vertex.h"
@@ -131,6 +108,7 @@ typedef struct StreamDesc
GLint size; //number of data element
GLenum type; //data element type
GLsizei stride;
+ GLenum format; // GL_RGBA,GLBGRA
struct radeon_bo *bo;
GLint bo_offset;
@@ -167,9 +145,6 @@ struct r600_context {
/* Vertex buffers
*/
- GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
-
GLint nNumActiveAos;
StreamDesc stream_desc[VERT_ATTRIB_MAX];
struct r700_index_buffer ind_buf;
@@ -179,7 +154,7 @@ struct r600_context {
#define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx))
extern GLboolean r600CreateContext(const __GLcontextModes * glVisual,
- __DRIcontextPrivate * driContextPriv,
+ __DRIcontext * driContextPriv,
void *sharedContextPrivate);
#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw))
@@ -203,7 +178,6 @@ extern GLboolean r700SyncSurf(context_t *context,
uint32_t write_domain,
uint32_t sync_type);
-extern void r700SetupStreams(GLcontext * ctx);
extern void r700Start3D(context_t *context);
extern void r600InitAtoms(context_t *context);
extern void r700InitDraw(GLcontext *ctx);
@@ -213,7 +187,4 @@ extern void r700InitDraw(GLcontext *ctx);
#define RADEON_D_PLAYBACK_RAW 2
#define RADEON_D_T 3
-#define r600PackFloat32 radeonPackFloat32
-#define r600PackFloat24 radeonPackFloat24
-
#endif /* __R600_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
index f7702c46de..74af7b4fed 100644
--- a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
+++ b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
@@ -415,11 +415,11 @@ enum {
ALPHA_TO_MASK_ENABLE = 1 << 0,
ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8,
ALPHA_TO_MASK_OFFSET0_shift = 8,
- ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10,
ALPHA_TO_MASK_OFFSET1_shift = 10,
- ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12,
ALPHA_TO_MASK_OFFSET2_shift = 12,
- ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8,
+ ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14,
ALPHA_TO_MASK_OFFSET3_shift = 14,
// SQ_VTX_CONSTANT_WORD2_0 = 0x00038008,
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
index e5c01c861a..eb169bd885 100644
--- a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
+++ b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
@@ -143,6 +143,8 @@ enum {
// SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c,
R7xx_TRUNCATE_COORD_bit = 1 << 9,
R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10,
+// DB_RENDER_CONTROL = 0x00028d0c,
+ PERFECT_ZPASS_COUNTS_bit = 1 << 15,
} ;
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index d105b90cd1..f745fe3e8a 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/image.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
-#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
@@ -286,6 +285,7 @@ static void r600TexParameter(GLcontext * ctx, GLenum target,
GLenum pname, const GLfloat * params)
{
radeonTexObj* t = radeon_tex_obj(texObj);
+ GLenum baseFormat;
radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE,
"%s( %s )\n", __FUNCTION__,
@@ -305,30 +305,22 @@ static void r600TexParameter(GLcontext * ctx, GLenum target,
break;
case GL_TEXTURE_BORDER_COLOR:
- r600SetTexBorderColor(t, texObj->BorderColor);
+ r600SetTexBorderColor(t, texObj->BorderColor.f);
break;
case GL_TEXTURE_BASE_LEVEL:
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
- /* This isn't the most efficient solution but there doesn't appear to
- * be a nice alternative. Since there's no LOD clamping,
- * we just have to rely on loading the right subset of mipmap levels
- * to simulate a clamped LOD.
- */
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- t->validated = GL_FALSE;
- }
+ t->validated = GL_FALSE;
break;
case GL_DEPTH_TEXTURE_MODE:
if (!texObj->Image[0][texObj->BaseLevel])
return;
- if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
- == GL_DEPTH_COMPONENT) {
+ baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+ if (baseFormat == GL_DEPTH_COMPONENT ||
+ baseFormat == GL_DEPTH_STENCIL) {
r600SetDepthTexMode(texObj);
break;
} else {
@@ -368,10 +360,8 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
t->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = 0;
- }
+ radeon_miptree_unreference(&t->mt);
+
_mesa_delete_texture_object(ctx, texObj);
}
@@ -401,7 +391,7 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx,
r600SetTexDefaultState(t);
r600UpdateTexWrap(t);
r600SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
- r600SetTexBorderColor(t, t->base.BorderColor);
+ r600SetTexBorderColor(t, t->base.BorderColor.f);
return &t->base;
}
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 7d7e77d355..ae252c995b 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/context.h"
#include "main/macros.h"
-#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
@@ -78,7 +77,7 @@ void r600UpdateTextureState(GLcontext * ctx)
}
}
-static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format)
+static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format)
{
radeonTexObj *t = radeon_tex_obj(tObj);
@@ -87,9 +86,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+
switch (mesa_format) /* This is mesa format. */
{
case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_SIGNED_RGBA8888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -101,8 +110,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
break;
case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -114,6 +134,16 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
break;
case MESA_FORMAT_ARGB8888:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
@@ -480,13 +510,21 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
case MESA_FORMAT_Z16:
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z32:
+ case MESA_FORMAT_S8:
switch (mesa_format) {
case MESA_FORMAT_Z16:
SETfield(t->SQ_TEX_RESOURCE1, FMT_16,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
break;
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_24,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
case MESA_FORMAT_Z24_S8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
@@ -495,6 +533,12 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
SETfield(t->SQ_TEX_RESOURCE1, FMT_32,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
break;
+ case MESA_FORMAT_S8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+ break;
+ default:
+ break;
};
switch (tObj->DepthMode) {
case GL_LUMINANCE: /* X, X, X, ONE */
@@ -582,6 +626,31 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
return GL_TRUE;
}
+static GLuint r600_translate_shadow_func(GLenum func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return SQ_TEX_DEPTH_COMPARE_NEVER;
+ case GL_LESS:
+ return SQ_TEX_DEPTH_COMPARE_LESS;
+ case GL_LEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
+ case GL_GREATER:
+ return SQ_TEX_DEPTH_COMPARE_GREATER;
+ case GL_GEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
+ case GL_NOTEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
+ case GL_EQUAL:
+ return SQ_TEX_DEPTH_COMPARE_EQUAL;
+ case GL_ALWAYS:
+ return SQ_TEX_DEPTH_COMPARE_ALWAYS;
+ default:
+ WARN_ONCE("Unknown shadow compare function! %d", func);
+ return 0;
+ }
+}
+
void r600SetDepthTexMode(struct gl_texture_object *tObj)
{
radeonTexObjPtr t;
@@ -591,7 +660,7 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj)
t = radeon_tex_obj(tObj);
- r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat);
+ r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat);
}
@@ -605,7 +674,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
{
radeonTexObj *t = radeon_tex_obj(texObj);
const struct gl_texture_image *firstImage;
- int firstlevel = t->mt ? t->mt->firstLevel : 0;
GLuint uTexelPitch, row_align;
if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled &&
@@ -613,10 +681,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
t->bo)
return;
- firstImage = t->base.Image[0][firstlevel];
+ firstImage = t->base.Image[0][t->minLod];
if (!t->image_override) {
- if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) {
+ if (!r600GetTexFormat(texObj, firstImage->TexFormat)) {
radeon_error("unexpected texture format in %s\n",
__FUNCTION__);
return;
@@ -648,7 +716,8 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
}
row_align = rmesa->radeon.texture_row_align - 1;
- uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp;
+ uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align;
+ uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat);
uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
@@ -662,11 +731,22 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex
SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
TEX_HEIGHT_shift, TEX_HEIGHT_mask);
- if ((t->mt->lastLevel - t->mt->firstLevel) > 0) {
- t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256;
- SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask);
- SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask);
+ t->SQ_TEX_RESOURCE2 = get_base_teximage_offset(t) / 256;
+
+ if ((t->maxLod - t->minLod) > 0) {
+ t->SQ_TEX_RESOURCE3 = radeon_miptree_image_offset(t->mt, 0, t->minLod + 1) / 256;
+ SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask);
+ SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask);
}
+ if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB)
+ {
+ SETfield(t->SQ_TEX_SAMPLER0, r600_translate_shadow_func(texObj->CompareFunc), DEPTH_COMPARE_FUNCTION_shift, DEPTH_COMPARE_FUNCTION_mask);
+ }
+ else
+ {
+ CLEARfield(t->SQ_TEX_SAMPLER0, DEPTH_COMPARE_FUNCTION_mask);
+ }
+
}
/**
@@ -764,7 +844,8 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
struct gl_texture_object *tObj =
_mesa_lookup_texture(rmesa->radeon.glCtx, texname);
radeonTexObjPtr t = radeon_tex_obj(tObj);
- uint32_t pitch_val, size;
+ const struct gl_texture_image *firstImage;
+ uint32_t pitch_val, size, row_align;
if (!tObj)
return;
@@ -774,7 +855,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
if (!offset)
return;
- size = pitch;//h * w * (depth / 8);
+ firstImage = t->base.Image[0][t->minLod];
+ row_align = rmesa->radeon.texture_row_align - 1;
+ size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height;
if (t->bo) {
radeon_bo_unref(t->bo);
t->bo = NULL;
@@ -870,18 +953,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
return;
}
- radeon_update_renderbuffers(pDRICtx, dPriv);
- /* back & depth buffer are useless free them right away */
- rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
- rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
+ radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
rb = rfb->color_rb[0];
if (rb->bo == NULL) {
/* Failed to BO for the buffer */
@@ -897,20 +969,14 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
- if (t->mt) {
- radeon_miptree_unreference(t->mt);
- t->mt = NULL;
- }
- if (rImage->mt) {
- radeon_miptree_unreference(rImage->mt);
- rImage->mt = NULL;
- }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
- texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
- internalFormat,
- type, format, 0);
+
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 903b6968be..0ff16b4ddd 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -32,12 +32,49 @@
#include "main/mtypes.h"
#include "main/imports.h"
+#include "shader/prog_parameter.h"
#include "radeon_debug.h"
#include "r600_context.h"
#include "r700_assembler.h"
+#define USE_CF_FOR_CONTINUE_BREAK 1
+#define USE_CF_FOR_POP_AFTER 1
+
+struct prog_instruction noise1_insts[12] = {
+ {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+ {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+ &(noise1_insts[0]),
+ 12,
+ 2,
+ 1,
+ 0,
+ &(noise1_const[0]),
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ {0,0,0},
+ 0
+};
+
BITS addrmode_PVSDST(PVSDST * pPVSDST)
{
return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
@@ -327,21 +364,27 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
return(format);
}
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
{
- if(pAsm->D.dst.op3)
+ if(nIsOp3 > 0)
{
return 3;
}
- switch (pAsm->D.dst.opcode)
+ switch (opcode)
{
- case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_KILLE:
+ case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_KILLGE:
+ case SQ_OP2_INST_KILLNE:
case SQ_OP2_INST_MUL:
case SQ_OP2_INST_MAX:
case SQ_OP2_INST_MIN:
//case SQ_OP2_INST_MAX_DX10:
//case SQ_OP2_INST_MIN_DX10:
+ case SQ_OP2_INST_SETE:
+ case SQ_OP2_INST_SETNE:
case SQ_OP2_INST_SETGT:
case SQ_OP2_INST_SETGE:
case SQ_OP2_INST_PRED_SETE:
@@ -354,9 +397,10 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 2;
case SQ_OP2_INST_MOV:
+ case SQ_OP2_INST_MOVA_FLOOR:
case SQ_OP2_INST_FRACT:
case SQ_OP2_INST_FLOOR:
- case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_TRUNC:
case SQ_OP2_INST_EXP_IEEE:
case SQ_OP2_INST_LOG_CLAMPED:
case SQ_OP2_INST_LOG_IEEE:
@@ -368,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 1;
default: radeon_error(
- "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+ "Need instruction operand number for %x.\n", opcode);
};
return 3;
@@ -382,103 +426,128 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
pAsm->pR700Shader = pShader;
pAsm->currentShaderType = spt;
- pAsm->cf_last_export_ptr = NULL;
+ pAsm->cf_last_export_ptr = NULL;
- pAsm->cf_current_export_clause_ptr = NULL;
- pAsm->cf_current_alu_clause_ptr = NULL;
- pAsm->cf_current_tex_clause_ptr = NULL;
- pAsm->cf_current_vtx_clause_ptr = NULL;
- pAsm->cf_current_cf_clause_ptr = NULL;
+ pAsm->cf_current_export_clause_ptr = NULL;
+ pAsm->cf_current_alu_clause_ptr = NULL;
+ pAsm->cf_current_tex_clause_ptr = NULL;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+ pAsm->cf_current_cf_clause_ptr = NULL;
- // No clause has been created yet
- pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+ // No clause has been created yet
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
- pAsm->number_of_colorandz_exports = 0;
- pAsm->number_of_exports = 0;
- pAsm->number_of_export_opcodes = 0;
+ pAsm->number_of_colorandz_exports = 0;
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_export_opcodes = 0;
+ pAsm->alu_x_opcode = 0;
- pAsm->D.bits = 0;
- pAsm->S[0].bits = 0;
- pAsm->S[1].bits = 0;
- pAsm->S[2].bits = 0;
+ pAsm->D2.bits = 0;
- pAsm->uLastPosUpdate = 0;
+ pAsm->D.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+
+ pAsm->uLastPosUpdate = 0;
- *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+ *(BITS *) &pAsm->fp_stOutFmt0 = 0;
- pAsm->uIIns = 0;
- pAsm->uOIns = 0;
- pAsm->number_used_registers = 0;
- pAsm->uUsedConsts = 256;
+ pAsm->uIIns = 0;
+ pAsm->uOIns = 0;
+ pAsm->number_used_registers = 0;
+ pAsm->uUsedConsts = 256;
- // Fragment programs
- pAsm->uBoolConsts = 0;
- pAsm->uIntConsts = 0;
- pAsm->uInsts = 0;
- pAsm->uConsts = 0;
+ // Fragment programs
+ pAsm->uBoolConsts = 0;
+ pAsm->uIntConsts = 0;
+ pAsm->uInsts = 0;
+ pAsm->uConsts = 0;
- pAsm->FCSP = 0;
- pAsm->fc_stack[0].type = FC_NONE;
+ pAsm->FCSP = 0;
+ pAsm->fc_stack[0].type = FC_NONE;
- pAsm->branch_depth = 0;
- pAsm->max_branch_depth = 0;
+ pAsm->aArgSubst[0] =
+ pAsm->aArgSubst[1] =
+ pAsm->aArgSubst[2] =
+ pAsm->aArgSubst[3] = (-1);
- pAsm->aArgSubst[0] =
- pAsm->aArgSubst[1] =
- pAsm->aArgSubst[2] =
- pAsm->aArgSubst[3] = (-1);
+ pAsm->uOutputs = 0;
- pAsm->uOutputs = 0;
+ for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
+ {
+ pAsm->color_export_register_number[i] = (-1);
+ }
- for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
- {
- pAsm->color_export_register_number[i] = (-1);
- }
+ pAsm->depth_export_register_number = (-1);
+ pAsm->stencil_export_register_number = (-1);
+ pAsm->coverage_to_mask_export_register_number = (-1);
+ pAsm->mask_export_register_number = (-1);
- pAsm->depth_export_register_number = (-1);
- pAsm->stencil_export_register_number = (-1);
- pAsm->coverage_to_mask_export_register_number = (-1);
- pAsm->mask_export_register_number = (-1);
+ pAsm->starting_export_register_number = 0;
+ pAsm->starting_vfetch_register_number = 0;
+ pAsm->starting_temp_register_number = 0;
+ pAsm->uFirstHelpReg = 0;
- pAsm->starting_export_register_number = 0;
- pAsm->starting_vfetch_register_number = 0;
- pAsm->starting_temp_register_number = 0;
- pAsm->uFirstHelpReg = 0;
+ pAsm->input_position_is_used = GL_FALSE;
+ pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
+ {
+ pAsm->input_color_is_used[ i ] = GL_FALSE;
+ }
- pAsm->input_position_is_used = GL_FALSE;
- pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
+ {
+ pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+ }
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
- for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
- {
- pAsm->input_color_is_used[ i ] = GL_FALSE;
- }
+ pAsm->number_of_inputs = 0;
- for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
- {
- pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
- }
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
- for (i=0; i<VERT_ATTRIB_MAX; i++)
- {
- pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
- }
+ pAsm->subs = NULL;
+ pAsm->unSubArraySize = 0;
+ pAsm->unSubArrayPointer = 0;
+ pAsm->callers = NULL;
+ pAsm->unCallerArraySize = 0;
+ pAsm->unCallerArrayPointer = 0;
+
+ pAsm->CALLSP = 0;
+ pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
+ pAsm->CALLSTACK[0].plstCFInstructions_local
+ = &(pAsm->pR700Shader->lstCFInstructions);
+
+ pAsm->CALLSTACK[0].max = 0;
+ pAsm->CALLSTACK[0].current = 0;
+
+ SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
- pAsm->number_of_inputs = 0;
+ pAsm->unCFflags = 0;
- pAsm->is_tex = GL_FALSE;
- pAsm->need_tex_barrier = GL_FALSE;
+ pAsm->presubs = NULL;
+ pAsm->unPresubArraySize = 0;
+ pAsm->unNumPresub = 0;
+ pAsm->unCurNumILInsts = 0;
- return 0;
+ pAsm->unVetTexBits = 0;
+
+ return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
{
- if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+ (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
{
return GL_TRUE;
}
@@ -591,6 +660,31 @@ int check_current_clause(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
+{
+ if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr =
+ (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+ if (pAsm->cf_current_cf_clause_ptr != NULL)
+ {
+ Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new VFetch CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
R700VertexInstruction* vertex_instruction_ptr)
{
@@ -797,6 +891,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLubyte element,
GLuint _signed,
GLboolean normalize,
+ GLenum format,
VTX_FETCH_METHOD * pFetchMethod)
{
GLuint client_size_inbyte;
@@ -845,10 +940,21 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ if(format == GL_BGRA)
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ }
vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
@@ -986,7 +1092,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -997,7 +1104,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1030,7 +1138,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -1041,7 +1150,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1052,7 +1162,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[1] = GL_FALSE;
}
- if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
@@ -1152,6 +1263,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
if (1 == pILInst->SrcReg[src].RelAddr)
{
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
@@ -1162,10 +1274,18 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
- pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ if(pILInst->SrcReg[src].Index < 0)
+ {
+ WARN_ONCE("Negative register offsets not supported yet!\n");
+ pAsm->S[fld].src.reg = 0;
+ }
+ else
+ {
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ }
break;
case PROGRAM_INPUT:
- setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
pAsm->S[fld].src.rtype = SRC_REG_INPUT;
switch (pAsm->currentShaderType)
{
@@ -1178,7 +1298,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
break;
default:
- radeon_error("Invalid source argument type\n");
+ radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
return GL_FALSE;
}
}
@@ -1234,6 +1354,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm)
pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+ if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
+ {
+ pAsm->D2.dst2.SaturateMode = 1;
+ }
+ else
+ {
+ pAsm->D2.dst2.SaturateMode = 0;
+ }
+
return GL_TRUE;
}
@@ -1293,6 +1422,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
else
{
switch (pILInst->SrcReg[0].File) {
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -1305,25 +1435,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
break;
case PROGRAM_INPUT:
- switch (pILInst->SrcReg[0].Index)
+ if(SPT_VP == pAsm->currentShaderType)
{
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- case FRAG_ATTRIB_TEX0:
- case FRAG_ATTRIB_TEX1:
- case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
- case FRAG_ATTRIB_TEX4:
- case FRAG_ATTRIB_TEX5:
- case FRAG_ATTRIB_TEX6:
- case FRAG_ATTRIB_TEX7:
- bValidTexCoord = GL_TRUE;
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case VERT_ATTRIB_TEX0:
+ case VERT_ATTRIB_TEX1:
+ case VERT_ATTRIB_TEX2:
+ case VERT_ATTRIB_TEX3:
+ case VERT_ATTRIB_TEX4:
+ case VERT_ATTRIB_TEX5:
+ case VERT_ATTRIB_TEX6:
+ case VERT_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ }
+ else
+ {
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_WPOS:
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ }
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
+ }
}
- break;
+
+ break;
}
}
@@ -1368,8 +1538,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
if (normalized) {
@@ -1388,7 +1567,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
-
tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
// dst
@@ -1505,6 +1683,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
{
src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
}
+ else if (pSource->rtype == SRC_REC_LITERAL)
+ {
+ src_sel = SQ_ALU_SRC_LITERAL;
+ }
else
{
radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
@@ -1594,7 +1776,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
return GL_FALSE;
}
- if ( pAsm->cf_current_alu_clause_ptr == NULL ||
+ if ( pAsm->alu_x_opcode != 0 ||
+ pAsm->cf_current_alu_clause_ptr == NULL ||
( (pAsm->cf_current_alu_clause_ptr != NULL) &&
(pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
) )
@@ -1624,9 +1807,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
- //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+
+ if(pAsm->alu_x_opcode != 0)
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
+ pAsm->alu_x_opcode = 0;
+ }
+ else
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+ }
pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
@@ -1634,7 +1825,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
}
else
{
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
}
// If this clause constains any instruction that is forward dependent on a TEX instruction,
@@ -1911,7 +2102,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2000,7 +2191,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2033,7 +2224,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_gpr(sel) )
{
if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
- {
+ {
return GL_FALSE;
}
@@ -2045,7 +2236,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
else
{
if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
- {
+ {
return GL_FALSE;
}
}
@@ -2057,7 +2248,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_cfile(sel) )
{
if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
- {
+ {
return GL_FALSE;
}
}
@@ -2069,6 +2260,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
+ R700ALUInstruction * alu_instruction_ptr;
+ R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+ R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
GLuint number_of_scalar_operations;
GLboolean is_single_scalar_operation;
GLuint scalar_channel_index;
@@ -2077,7 +2272,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
int current_source_index;
GLuint contiguous_slots_needed;
- GLuint uNumSrc = r700GetNumOperands(pAsm);
+ GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
//GLuint channel_swizzle, j;
//GLuint chan_counter[4] = {0, 0, 0, 0};
//PVSSRC * pSource[3];
@@ -2134,23 +2329,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
contiguous_slots_needed = 0;
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ if(!is_single_scalar_operation)
{
contiguous_slots_needed = 4;
}
+ contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
+
initialize(pAsm);
for (scalar_channel_index=0;
scalar_channel_index < number_of_scalar_operations;
scalar_channel_index++)
{
- R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstruction(alu_instruction_ptr);
+ if(scalar_channel_index == (number_of_scalar_operations-1))
+ {
+ switch(pAsm->D2.dst2.literal_slots)
+ {
+ case 0:
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ break;
+ case 1:
+ alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+ Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+ break;
+ case 2:
+ alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+ Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+ break;
+ };
+ }
+ else
+ {
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ }
//src 0
current_source_index = 0;
@@ -2160,7 +2376,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
@@ -2174,13 +2390,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
}
//other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+ alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
if( (is_single_scalar_operation == GL_TRUE)
|| (GL_TRUE == bSplitInst) )
@@ -2192,9 +2408,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
}
- alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
+ if(1 == pAsm->D.dst.predicated)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ }
// dst
if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
@@ -2203,7 +2427,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
}
else
- {
+ {
radeon_error("Only temp destination registers supported for ALU dest regs.\n");
return GL_FALSE;
}
@@ -2233,7 +2457,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
- alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
+ alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
if (pAsm->D.dst.op3)
{
@@ -2260,8 +2484,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
@@ -2289,8 +2513,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
@@ -2317,7 +2541,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
}
if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
- {
+ {
return GL_FALSE;
}
@@ -2328,19 +2552,19 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
if (is_single_scalar_operation)
{
if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
- {
+ {
return GL_FALSE;
}
}
else
{
if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
- {
- return 1;
+ {
+ return GL_FALSE;
}
}
- contiguous_slots_needed = 0;
+ contiguous_slots_needed -= 1;
}
return GL_TRUE;
@@ -2391,11 +2615,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
//reset for next inst.
pAsm->D.bits = 0;
+ pAsm->D2.bits = 0;
pAsm->S[0].bits = 0;
pAsm->S[1].bits = 0;
pAsm->S[2].bits = 0;
pAsm->is_tex = GL_FALSE;
pAsm->need_tex_barrier = GL_FALSE;
+ pAsm->D2.bits = 0;
+ pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
return GL_TRUE;
}
@@ -2514,6 +2741,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
+{ /* TODO: ar values dont' persist between clauses */
+ if( GL_FALSE == checkop1(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_BAD(char *opcode_str)
{
radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
@@ -2599,9 +2855,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_COS(r700_AssemblerBase *pAsm)
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
- return assemble_math_function(pAsm, SQ_OP2_INST_COS);
+ int tmp;
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ assemble_src(pAsm, 0, -1);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
+ next_ins(pAsm);
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+
+ assemble_dst(pAsm);
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ next_ins(pAsm);
+
+ //TODO - replicate if more channels set in WriteMask
+ return GL_TRUE;
+
}
GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
@@ -2635,7 +2926,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
}
else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
{
- onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+ onecomp_PVSSRC(&(pAsm->S[0].src), 3);
}
if ( GL_FALSE == next_ins(pAsm) )
@@ -2688,6 +2979,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
{
return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
}
+
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // FLOOR tmp.x, a.x
+ // EX2 dst.x tmp.x
+
+ if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // FRACT dst.y a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // EX2 dst.z, a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // MOV dst.w 1.0
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
{
@@ -2742,17 +3160,19 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
-{
- checkop1(pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
- pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
-
- if ( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ if(pILInst->Opcode == OPCODE_KIL)
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = opcode;
+ //pAsm->D.dst.math = 1;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
pAsm->D.dst.writex = 0;
pAsm->D.dst.writey = 0;
pAsm->D.dst.writez = 0;
@@ -2761,30 +3181,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = 0;
-
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
noneg_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
-
- if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+ if(pILInst->Opcode == OPCODE_KIL_NV)
{
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+ neg_PVSSRC(&(pAsm->S[1].src));
}
else
- { //PROGRAM_OUTPUT
- pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+ {
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+
}
-
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- noswizzle_PVSSRC(&(pAsm->S[1].src));
-
+
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
+ /* Doc says KILL has to be last(end) ALU clause */
pAsm->pR700Shader->killIsUsed = GL_TRUE;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
return GL_TRUE;
}
@@ -2848,6 +3272,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
+
if( GL_FALSE == assemble_src(pAsm, 2, -1) )
{
return GL_FALSE;
@@ -2878,6 +3303,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
+{
+ BITS tmp1, tmp2, tmp3;
+
+ checkop1(pAsm);
+
+ tmp1 = gethelpr(pAsm);
+ tmp2 = gethelpr(pAsm);
+ tmp3 = gethelpr(pAsm);
+
+ // FIXME: The hardware can do fabs() directly on input
+ // elements, but the compiler doesn't have the
+ // capability to use that.
+
+ // MAX tmp1.x, a.x, -a.x (fabs(a.x))
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->S[1].bits = pAsm->S[0].bits;
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // Entire algo:
+ //
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+ // MOV dst.x, tmp3.x
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+ // MOV dst.z, tmp2.x
+ // MOV dst.w, 1.0
+
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp2;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.x, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ neg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.z, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.w 1.0
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
{
int tmp, ii;
@@ -3035,6 +3671,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = srcType;
pAsm->S[0].src.reg = srcReg;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[1].src.reg = tmp;
setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
@@ -3371,77 +4008,137 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
}
-GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
-{
- return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
-}
-
GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
BITS tmp;
- checkop1(pAsm);
+ checkop1(pAsm);
- tmp = gethelpr(pAsm);
+ tmp = gethelpr(pAsm);
+ /* tmp.x = src /2*PI */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
- // COS tmp.x, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_COS;
- pAsm->D.dst.math = 1;
+ assemble_src(pAsm, 0, -1);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ next_ins(pAsm);
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ // COS dst.x, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ pAsm->D.dst.math = 1;
- // SIN tmp.y, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
- pAsm->D.dst.math = 1;
+ assemble_dst(pAsm);
+ /* mask y */
+ pAsm->D.dst.writey = 0;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writey = 1;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- if( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ // SIN dst.y, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ pAsm->D.dst.math = 1;
- // MOV dst.mask, tmp
- pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ assemble_dst(pAsm);
+ /* mask x */
+ pAsm->D.dst.writex = 0;
- if( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp;
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[0].src.swizzlez = SQ_SEL_0;
- pAsm->S[0].src.swizzlew = SQ_SEL_0;
+ return GL_TRUE;
+}
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = opcode;
+ //pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.predicated = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->uHelpReg;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
+ pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = pAsm->uHelpReg;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_0;
+ pAsm->S[1].src.swizzley = SQ_SEL_0;
+ pAsm->S[1].src.swizzlez = SQ_SEL_0;
+ pAsm->S[1].src.swizzlew = SQ_SEL_0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
return GL_TRUE;
}
@@ -3524,6 +4221,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
{
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -3544,22 +4242,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
need_barrier = GL_TRUE;
}
- switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
- {
- case OPCODE_TEX:
- break;
- case OPCODE_TXB:
- radeon_error("do not support TXB yet\n");
- return GL_FALSE;
- break;
- case OPCODE_TXP:
- break;
- default:
- radeon_error("Internal error: bad texture op (not TEX)\n");
- return GL_FALSE;
- break;
- }
-
if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
{
GLuint tmp = gethelpr(pAsm);
@@ -3637,24 +4319,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
- * have to do explicit instruction
- */
- pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp1;
- pAsm->D.dst.writez = 1;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp1;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].bits = pAsm->S[0].bits;
- flipneg_PVSSRC(&(pAsm->S[1].src));
-
- next_ins(pAsm);
-
/* tmp1.z = RCP_e(|tmp1.z|) */
pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
pAsm->D.dst.math = 1;
@@ -3667,13 +4331,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp1;
pAsm->S[0].src.swizzlex = SQ_SEL_Z;
+ pAsm->S[0].src.abs = 1;
next_ins(pAsm);
/* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
* MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
* muladd has no writemask, have to use another temp
- * also no support for imm constants, so add 1 here
*/
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
pAsm->D.dst.op3 = 1;
@@ -3690,30 +4354,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[1].src.reg = tmp1;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
- pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ /* immediate c 1.5 */
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1.5F;
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
pAsm->S[2].src.reg = tmp1;
- setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
-
- next_ins(pAsm);
-
- /* ADD the remaining .5 */
- pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp2;
- pAsm->D.dst.writex = 1;
- pAsm->D.dst.writey = 1;
- pAsm->D.dst.writez = 0;
- pAsm->D.dst.writew = 0;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp2;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
- noswizzle_PVSSRC(&(pAsm->S[1].src));
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
next_ins(pAsm);
@@ -3738,14 +4384,35 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ case OPCODE_DDX:
+ /* will these need WQM(1) on CF inst ? */
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+ break;
+ case OPCODE_DDY:
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+ break;
+ case OPCODE_TXB:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ break;
+ default:
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
+ else
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ }
+
+ pAsm->is_tex = GL_TRUE;
+ if ( GL_TRUE == need_barrier )
+
pAsm->is_tex = GL_TRUE;
if ( GL_TRUE == need_barrier )
{
pAsm->need_tex_barrier = GL_TRUE;
}
// Set src1 to tex unit id
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
+ pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
//No sw info from mesa compiler, so hard code here.
@@ -3779,11 +4446,46 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[0].src.swizzlew = SQ_SEL_Y;
}
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* compare value goes to w chan ? */
+ pAsm->S[0].src.swizzlew = SQ_SEL_Z;
+ }
+
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
+ /* add ARB shadow ambient but clamp to 0..1 */
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* ADD_SAT dst, dst, ambient[texunit] */
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ pAsm->D2.dst2.SaturateMode = 1;
+
+ pAsm->S[0].src.rtype = pAsm->D.dst.rtype;
+ pAsm->S[0].src.reg = pAsm->D.dst.reg;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
+ pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
+ noswizzle_PVSSRC(&(pAsm->S[1].src));
+ noneg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ }
+
return GL_TRUE;
}
@@ -3902,27 +4604,909 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_IF(r700_AssemblerBase *pAsm)
+static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
{
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current--;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should -= 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
+ break;
+ };
+}
+
+static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
+{
+ if(GL_TRUE == bCheckMaxOnly)
+ {
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
+ }
+ break;
+ case FC_PUSH_WQM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
+ }
+ break;
+ }
+ return;
+ }
+
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current++;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should += 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
+ break;
+ };
+
+ if(pAsm->CALLSTACK[pAsm->CALLSP].current
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current;
+ }
+}
+
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
+
+ return GL_TRUE;
+}
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
+{
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ }
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].midLen= 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
+ 0,
+ sizeof(R700ControlFlowGenericClause *) );
+ pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
+ //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
+
return GL_TRUE;
}
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
{
+#ifdef USE_CF_FOR_POP_AFTER
+ pops(pAsm, 1);
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ /* no else in between */
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+ else
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
+ {
+ radeon_error("if/endif in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_PUSH_VPM);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
+ pAsm->fc_stack[pAsm->FCSP].midLen = 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+ checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Break is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif //USE_CF_FOR_CONTINUE_BREAK
+ return GL_TRUE;
+}
+
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Continue is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif /* USE_CF_FOR_CONTINUE_BREAK */
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
+{
+ GLuint i;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
+ }
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+#endif
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
+ {
+ radeon_error("loop/endloop in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ GLuint unFCSP;
+ GLuint unIF = 0;
+ if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
+ {
+ for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ breakLoopOnFlag(pAsm, unFCSP);
+ break;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
+ {
+#ifdef USE_CF_FOR_POP_AFTER
+ returnOnFlag(pAsm, unIF);
+#else
+ returnOnFlag(pAsm, 0);
+#endif /* USE_CF_FOR_POP_AFTER */
+ pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
+ }
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_LOOP);
+
+ return GL_TRUE;
+}
+
+void add_return_inst(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+}
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
+{
+ /* Put in sub */
+ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
+ {
+ pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
+ sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
+ sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
+ if(NULL == pAsm->subs)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unSubArraySize += 10;
+ }
+
+ pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
+
+ pAsm->CALLSP++;
+ pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
+ pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
+ = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
+ pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
+ pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->unSubArrayPointer++;
+
+ /* start sub */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
+
+ checkStackDepth(pAsm, FC_REP, GL_FALSE);
+
return GL_TRUE;
}
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
+{
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
+ {
+ radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ /* copy max to sub structure */
+ pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
+ = pAsm->CALLSTACK[pAsm->CALLSP].max;
+
+ decreaseCurrent(pAsm, FC_REP);
+
+ pAsm->CALLSP--;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP--;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_RET(r700_AssemblerBase *pAsm)
+{
+ GLuint unIF = 0;
+
+ if(pAsm->CALLSP > 0)
+ { /* in sub */
+ GLuint unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ setRetInLoopFlag(pAsm, SQ_SEL_1);
+ breakLoopOnFlag(pAsm, unFCSP);
+ pAsm->unCFflags |= LOOPRET_FLAGS;
+
+ return GL_TRUE;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ }
+
+#ifdef USE_CF_FOR_POP_AFTER
+ if(unIF > 0)
+ {
+ pops(pAsm, unIF);
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
+ GLint nILindex,
+ GLuint uiIL_Shift,
+ GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc)
+{
+ GLint uiIL_Offset;
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ /* Put in caller */
+ if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
+ {
+ pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
+ sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
+ sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
+ if(NULL == pAsm->callers)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unCallerArraySize += 10;
+ }
+
+ uiIL_Offset = nILindex + uiIL_Shift;
+ pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
+ pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+
+ pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
+ pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
+
+ pAsm->unCallerArrayPointer++;
+
+ int j;
+ GLuint max;
+ GLuint unSubID;
+ GLboolean bRet;
+ for(j=0; j<pAsm->unSubArrayPointer; j++)
+ {
+ if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
+ { /* compiled before */
+
+ max = pAsm->subs[j].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
+ return GL_TRUE;
+ }
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
+ unSubID = pAsm->unSubArrayPointer;
+
+ bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
+
+ if(GL_TRUE == bRet)
+ {
+ max = pAsm->subs[unSubID].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+
+ pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
+ }
+
+ return bRet;
+}
+
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ pAsm->D.dst.op3 = 0;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->flag_reg_index;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 0;
+ /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
+ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+#if 0
+ pAsm->S[0].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = flagValue;
+ pAsm->S[0].src.swizzley = flagValue;
+ pAsm->S[0].src.swizzlez = flagValue;
+ pAsm->S[0].src.swizzlew = flagValue;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ return GL_TRUE;
+}
+
+GLboolean testFlag(r700_AssemblerBase *pAsm)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ //Test flag
+ GLuint tmp = gethelpr(pAsm);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 1;
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->flag_reg_index;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+#if 0
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_X;
+ pAsm->S[1].src.swizzley = SQ_SEL_Y;
+ pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_1;
+ pAsm->S[1].src.swizzley = SQ_SEL_1;
+ pAsm->S[1].src.swizzlez = SQ_SEL_1;
+ pAsm->S[1].src.swizzlew = SQ_SEL_1;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+ return GL_TRUE;
+}
+
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
+{
+ testFlag(pAsm);
+ jumpToOffest(pAsm, 1, 4);
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pops(pAsm, unIF + 1);
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
+{
+ testFlag(pAsm);
+
+ //break
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ pops(pAsm, 1);
+
+ return GL_TRUE;
+}
+
+GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
+ GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode)
{
GLuint i;
pR700AsmCode->pILInst = pILInst;
- for(i=0; i<uiNumberInsts; i++)
+ for(i=uiFirstInst; i<uiNumberInsts; i++)
{
pR700AsmCode->uiCurInst = i;
+#ifndef USE_CF_FOR_CONTINUE_BREAK
+ if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+ switch(pILInst[i].Opcode)
+ {
+ case OPCODE_SLE:
+ pILInst[i].Opcode = OPCODE_SGT;
+ break;
+ case OPCODE_SLT:
+ pILInst[i].Opcode = OPCODE_SGE;
+ break;
+ case OPCODE_SGE:
+ pILInst[i].Opcode = OPCODE_SLT;
+ break;
+ case OPCODE_SGT:
+ pILInst[i].Opcode = OPCODE_SLE;
+ break;
+ case OPCODE_SEQ:
+ pILInst[i].Opcode = OPCODE_SNE;
+ break;
+ case OPCODE_SNE:
+ pILInst[i].Opcode = OPCODE_SEQ;
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+ if(pILInst[i].CondUpdate == 1)
+ {
+ /* remember dest register used for cond evaluation */
+ /* XXX also handle PROGRAM_OUTPUT registers here? */
+ pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
+ }
+
switch (pILInst[i].Opcode)
{
case OPCODE_ABS:
@@ -3936,8 +5520,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_ARL:
- radeon_error("Not yet implemented instruction OPCODE_ARL \n");
- //if ( GL_FALSE == assemble_BAD("ARL") )
+ if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
return GL_FALSE;
break;
case OPCODE_ARR:
@@ -3951,7 +5534,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_COS:
- if ( GL_FALSE == assemble_COS(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
return GL_FALSE;
break;
@@ -3972,16 +5555,16 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_EXP:
- radeon_error("Not yet implemented instruction OPCODE_EXP \n");
- //if ( GL_FALSE == assemble_BAD("EXP") )
+ if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
return GL_FALSE;
- break; // approx of EX2
+ break;
case OPCODE_FLR:
if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
return GL_FALSE;
break;
- //case OP_FLR_INT:
+ //case OP_FLR_INT: ;
+
// if ( GL_FALSE == assemble_FLR_INT() )
// return GL_FALSE;
// break;
@@ -3992,7 +5575,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_KIL:
- if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
+ case OPCODE_KIL_NV:
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
return GL_FALSE;
break;
case OPCODE_LG2:
@@ -4008,10 +5592,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_LOG:
- radeon_error("Not yet implemented instruction OPCODE_LOG \n");
- //if ( GL_FALSE == assemble_BAD("LOG") )
+ if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
return GL_FALSE;
- break; // approx of LG2
+ break;
case OPCODE_MAD:
if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
@@ -4033,6 +5616,26 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
case OPCODE_MUL:
if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
return GL_FALSE;
+ break;
+
+ case OPCODE_NOISE1:
+ {
+ callPreSub(pR700AsmCode,
+ GLSL_NOISE1,
+ &noise1_presub,
+ pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
+ 1);
+ radeon_error("noise1: not yet supported shader instruction\n");
+ };
+ break;
+ case OPCODE_NOISE2:
+ radeon_error("noise2: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE3:
+ radeon_error("noise3: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE4:
+ radeon_error("noise4: not yet supported shader instruction\n");
break;
case OPCODE_POW:
@@ -4048,22 +5651,78 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_SIN:
- if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
return GL_FALSE;
break;
case OPCODE_SCS:
if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
return GL_FALSE;
- break;
+ break;
+
+ case OPCODE_SEQ:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ break;
case OPCODE_SGE:
if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
+ {
return GL_FALSE;
- break;
+ }
+ break;
+
+ /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
case OPCODE_SLT:
- if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SLE:
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SNE:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
+ {
return GL_FALSE;
- break;
+ }
+ break;
//case OP_STP:
// if ( GL_FALSE == assemble_STP(pR700AsmCode) )
@@ -4089,7 +5748,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
}
}
break;
-
+ case OPCODE_DDX:
+ case OPCODE_DDY:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
@@ -4097,30 +5757,104 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
+ case OPCODE_TRUNC:
+ if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
+ return GL_FALSE;
+ break;
+
case OPCODE_XPD:
if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
return GL_FALSE;
break;
- case OPCODE_IF :
- if ( GL_FALSE == assemble_IF(pR700AsmCode) )
- return GL_FALSE;
+ case OPCODE_IF:
+ {
+ GLboolean bHasElse = GL_FALSE;
+
+ if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
+ {
+ bHasElse = GL_TRUE;
+ }
+
+ if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
+ {
+ return GL_FALSE;
+ }
+ }
break;
+
case OPCODE_ELSE :
- radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
- //if ( GL_FALSE == assemble_BAD("ELSE") )
+ if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
return GL_FALSE;
break;
+
case OPCODE_ENDIF:
if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
return GL_FALSE;
break;
+ case OPCODE_BGNLOOP:
+ if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BRK:
+ if( GL_FALSE == assemble_BRK(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CONT:
+ if( GL_FALSE == assemble_CONT(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_ENDLOOP:
+ if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BGNSUB:
+ if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_RET:
+ if( GL_FALSE == assemble_RET(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CAL:
+ if( GL_FALSE == assemble_CAL(pR700AsmCode,
+ pILInst[i].BranchTarget,
+ uiIL_Shift,
+ uiNumberInsts,
+ pILInst,
+ NULL) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
//case OPCODE_EXPORT:
// if ( GL_FALSE == assemble_EXPORT() )
// return GL_FALSE;
// break;
+ case OPCODE_ENDSUB:
+ return assemble_ENDSUB(pR700AsmCode);
+
case OPCODE_END:
//pR700AsmCode->uiCurInst = i;
//This is to remaind that if in later exoort there is depth/stencil
@@ -4137,6 +5871,417 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_TRUE;
}
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
+{
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ return GL_TRUE;
+}
+
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
+{
+ GLuint i;
+ GLuint unCFoffset;
+ TypedShaderList * plstCFmain;
+ TypedShaderList * plstCFsub;
+
+ R700ShaderInstruction * pInst;
+ R700ControlFlowGenericClause * pCFInst;
+
+ R700ControlFlowALUClause * pCF_ALU;
+ R700ALUInstruction * pALU;
+ GLuint unConstOffset = 0;
+ GLuint unRegOffset;
+ GLuint unMinRegIndex;
+
+ plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
+
+ /* remove flags init if they are not used */
+ if((pAsm->unCFflags & HAS_LOOPRET) == 0)
+ {
+ R700ControlFlowALUClause * pCF_ALU;
+ pInst = plstCFmain->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+ if(0 == pCF_ALU->m_Word1.f.count)
+ {
+ pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
+ }
+ else
+ {
+ R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
+
+ pALU->m_pLinkedALUClause = NULL;
+ pALU = (R700ALUInstruction *)(pALU->pNextInst);
+ pALU->m_pLinkedALUClause = pCF_ALU;
+ pCF_ALU->m_pLinkedALUInstruction = pALU;
+
+ pCF_ALU->m_Word1.f.count--;
+ }
+ break;
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
+ if(pAsm->CALLSTACK[0].max > 0)
+ {
+ pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
+ }
+
+ if(0 == pAsm->unSubArrayPointer)
+ {
+ return GL_TRUE;
+ }
+
+ unCFoffset = plstCFmain->uNumOfNode;
+
+ if(NULL != pILProg->Parameters)
+ {
+ unConstOffset = pILProg->Parameters->NumParameters;
+ }
+
+ /* Reloc subs */
+ for(i=0; i<pAsm->unSubArrayPointer; i++)
+ {
+ pAsm->subs[i].unCFoffset = unCFoffset;
+ plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
+
+ pInst = plstCFsub->pHead;
+
+ /* reloc instructions */
+ while(pInst)
+ {
+ if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
+ {
+ pCFInst = (R700ControlFlowGenericClause *)pInst;
+
+ switch (pCFInst->m_Word1.f.cf_inst)
+ {
+ case SQ_CF_INST_POP:
+ case SQ_CF_INST_JUMP:
+ case SQ_CF_INST_ELSE:
+ case SQ_CF_INST_LOOP_END:
+ case SQ_CF_INST_LOOP_START:
+ case SQ_CF_INST_LOOP_START_NO_AL:
+ case SQ_CF_INST_LOOP_CONTINUE:
+ case SQ_CF_INST_LOOP_BREAK:
+ pCFInst->m_Word0.f.addr += unCFoffset;
+ break;
+ default:
+ break;
+ }
+ }
+
+ pInst->m_uIndex += unCFoffset;
+
+ pInst = pInst->pNextInst;
+ };
+
+ if(NULL != pAsm->subs[i].pPresubDesc)
+ {
+ GLuint uNumSrc;
+
+ unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
+ unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+ pInst = plstCFsub->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+ if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src0_sel += unConstOffset;
+ }
+
+ if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
+ >= SQ_OP3_INST_MUL_LIT )
+ { /* op3 : 3 srcs */
+ if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+ }
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ else
+ {
+ if(pAsm->bR6xx)
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ }
+ if(2 == uNumSrc)
+ { /* 2 srcs */
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ }
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
+ /* Put sub into main */
+ plstCFmain->pTail->pNextInst = plstCFsub->pHead;
+ plstCFmain->pTail = plstCFsub->pTail;
+ plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
+
+ unCFoffset += plstCFsub->uNumOfNode;
+ }
+
+ /* reloc callers */
+ for(i=0; i<pAsm->unCallerArrayPointer; i++)
+ {
+ pAsm->callers[i].cf_ptr->m_Word0.f.addr
+ = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
+
+ if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+ {
+ unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+ if(NULL != pAsm->callers[i].prelude_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ if(NULL != pAsm->callers[i].finale_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc)
+{
+ /* save assemble context */
+ GLuint starting_temp_register_number_save;
+ GLuint number_used_registers_save;
+ GLuint uFirstHelpReg_save;
+ GLuint uHelpReg_save;
+ GLuint uiCurInst_save;
+ struct prog_instruction *pILInst_save;
+ PRESUB_DESC * pPresubDesc;
+ GLboolean bRet;
+ int i;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+ /* copy srcs to presub inputs */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ for(i=0; i<uNumValidSrc; i++)
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, i, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ next_ins(pAsm);
+ }
+ if(uNumValidSrc > 0)
+ {
+ prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ /* browse thro existing presubs. */
+ for(i=0; i<pAsm->unNumPresub; i++)
+ {
+ if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+ {
+ break;
+ }
+ }
+
+ if(i == pAsm->unNumPresub)
+ { /* not loaded yet */
+ /* save assemble context */
+ number_used_registers_save = pAsm->number_used_registers;
+ uFirstHelpReg_save = pAsm->uFirstHelpReg;
+ uHelpReg_save = pAsm->uHelpReg;
+ starting_temp_register_number_save = pAsm->starting_temp_register_number;
+ pILInst_save = pAsm->pILInst;
+ uiCurInst_save = pAsm->uiCurInst;
+
+ /* alloc in presub */
+ if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+ {
+ pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+ sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+ sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+ if(NULL == pAsm->presubs)
+ {
+ radeon_error("No memeory to allocate built in shader function description structures. \n");
+ return GL_FALSE;
+ }
+ pAsm->unPresubArraySize += 4;
+ }
+
+ pPresubDesc = &(pAsm->presubs[i]);
+ pPresubDesc->sptSigniture = scriptSigniture;
+
+ /* constants offsets need to be final resolved at reloc. */
+ if(0 == pAsm->unNumPresub)
+ {
+ pPresubDesc->unConstantsStart = 0;
+ }
+ else
+ {
+ pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
+ + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+ }
+
+ pPresubDesc->pCompiledSub = pCompiledSub;
+
+ pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+ pPresubDesc->maxStartReg = uFirstHelpReg_save;
+ pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
+
+ pAsm->unNumPresub++;
+
+ /* setup new assemble context */
+ pAsm->starting_temp_register_number = 0;
+ pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+ pAsm->uHelpReg = pAsm->uFirstHelpReg;
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+
+
+ pPresubDesc->number_used_registers = pAsm->number_used_registers;
+
+ /* restore assemble context */
+ pAsm->number_used_registers = number_used_registers_save;
+ pAsm->uFirstHelpReg = uFirstHelpReg_save;
+ pAsm->uHelpReg = uHelpReg_save;
+ pAsm->starting_temp_register_number = starting_temp_register_number_save;
+ pAsm->pILInst = pILInst_save;
+ pAsm->uiCurInst = uiCurInst_save;
+ }
+ else
+ { /* was loaded */
+ pPresubDesc = &(pAsm->presubs[i]);
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+ }
+
+ if(GL_FALSE == bRet)
+ {
+ radeon_error("Shader presub assemble failed. \n");
+ }
+ else
+ {
+ /* copy presub output to real dst */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
+ pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+ pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+ pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+ pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+ next_ins(pAsm);
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+ {
+ pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+ }
+ if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+ {
+ pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+ }
+
+ return bRet;
+}
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -4282,6 +6427,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
GLbitfield OutputsWritten)
{
unsigned int unBit;
+ GLuint export_count = 0;
if(pR700AsmCode->depth_export_register_number >= 0)
{
@@ -4303,6 +6449,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
unBit = 1 << FRAG_RESULT_DEPTH;
if(OutputsWritten & unBit)
@@ -4316,8 +6463,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
-
+ /* Need to export something, otherwise we'll hang
+ * results are undefined anyway */
+ if(export_count == 0)
+ {
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ }
+
if(pR700AsmCode->cf_last_export_ptr != NULL)
{
pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
@@ -4422,6 +6576,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
export_starting_index++;
}
}
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[i],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+ }
// At least one param should be exported
if (export_count)
@@ -4456,6 +6629,21 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
{
FREE(pR700AsmCode->pucOutMask);
FREE(pR700AsmCode->pInstDeps);
+
+ if(NULL != pR700AsmCode->subs)
+ {
+ FREE(pR700AsmCode->subs);
+ }
+ if(NULL != pR700AsmCode->callers)
+ {
+ FREE(pR700AsmCode->callers);
+ }
+
+ if(NULL != pR700AsmCode->presubs)
+ {
+ FREE(pR700AsmCode->presubs);
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 0d4283e4ba..56baf5b0d9 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -34,6 +34,45 @@
#include "r700_shaderinst.h"
#include "r700_shader.h"
+typedef enum LOADABLE_SCRIPT_SIGNITURE
+{
+ GLSL_NOISE1 = 0x10000001,
+ GLSL_NOISE2 = 0x10000002,
+ GLSL_NOISE3 = 0x10000003,
+ GLSL_NOISE4 = 0x10000004
+}LOADABLE_SCRIPT_SIGNITURE;
+
+typedef struct COMPILED_SUB
+{
+ struct prog_instruction *Instructions;
+ GLuint NumInstructions;
+ GLuint NumTemporaries;
+ GLuint NumParameters;
+ GLuint MinRegIndex;
+ GLfloat (*ParameterValues)[4];
+ GLbyte outputSwizzleX;
+ GLbyte outputSwizzleY;
+ GLbyte outputSwizzleZ;
+ GLbyte outputSwizzleW;
+ GLshort srcRegIndex[3];
+ GLushort dstRegIndex;
+}COMPILED_SUB;
+
+typedef struct PRESUB_DESCtag
+{
+ LOADABLE_SCRIPT_SIGNITURE sptSigniture;
+ GLint subIL_Shift;
+ struct prog_src_register InReg[3];
+ struct prog_dst_register OutReg;
+
+ GLushort maxStartReg;
+ GLushort number_used_registers;
+
+ GLuint unConstantsStart;
+
+ COMPILED_SUB * pCompiledSub;
+} PRESUB_DESC;
+
typedef enum SHADER_PIPE_TYPE
{
SPT_VP = 0,
@@ -72,7 +111,8 @@ typedef enum SrcRegisterType
SRC_REG_INPUT = 1,
SRC_REG_CONSTANT = 2,
SRC_REG_ALT_TEMPORARY = 3,
- NUMBER_OF_SRC_REG_TYPE = 4
+ SRC_REC_LITERAL = 4,
+ NUMBER_OF_SRC_REG_TYPE = 5
} SrcRegisterType;
typedef enum DstRegisterType
@@ -111,16 +151,24 @@ typedef struct PVSDSTtag
BITS addrmode1:1; //32
} PVSDST;
+typedef struct PVSINSTtag
+{
+ BITS literal_slots :2;
+ BITS SaturateMode :2;
+ BITS index_mode :3;
+} PVSINST;
+
typedef struct PVSSRCtag
{
- BITS rtype:4;
+ BITS rtype:3;
BITS addrmode0:1;
- BITS reg:10; //15 (8)
+ BITS reg:10; //14 (8)
BITS swizzlex:3;
BITS swizzley:3;
BITS swizzlez:3;
- BITS swizzlew:3; //27
+ BITS swizzlew:3; //26
+ BITS abs:1;
BITS negx:1;
BITS negy:1;
BITS negz:1;
@@ -148,6 +196,7 @@ typedef union PVSDWORDtag
{
BITS bits;
PVSDST dst;
+ PVSINST dst2;
PVSSRC src;
PVSMATH math;
float f;
@@ -251,6 +300,8 @@ enum
FC_IF = 1,
FC_LOOP = 2,
FC_REP = 3,
+ FC_PUSH_VPM = 4,
+ FC_PUSH_WQM = 5,
COND_NONE = 0,
COND_BOOL = 1,
@@ -263,22 +314,56 @@ enum
typedef struct FC_LEVEL
{
- unsigned int first; ///< first fc instruction on level (if, rep, loop)
- unsigned int* mid; ///< middle instructions - else or all breaks on this level
- unsigned int midLen;
- unsigned int type;
- unsigned int cond;
- unsigned int inv;
- unsigned int bpush; ///< 1 if first instruction does branch stack push
- int id; ///< id of bool or int variable
+ R700ControlFlowGenericClause * first;
+ R700ControlFlowGenericClause ** mid;
+ unsigned int unNumMid;
+ unsigned int midLen;
+ unsigned int type;
+ unsigned int cond;
+ unsigned int inv;
+ int id; ///< id of bool or int variable
} FC_LEVEL;
typedef struct VTX_FETCH_METHOD
{
- GLboolean bEnableMini;
- GLuint mega_fetch_remainder;
+ GLboolean bEnableMini;
+ GLuint mega_fetch_remainder;
} VTX_FETCH_METHOD;
+typedef struct SUB_OFFSET
+{
+ GLint subIL_Offset;
+ GLuint unCFoffset;
+ GLuint unStackDepthMax;
+ PRESUB_DESC * pPresubDesc;
+ TypedShaderList lstCFInstructions_local;
+} SUB_OFFSET;
+
+typedef struct CALLER_POINTER
+{
+ GLint subIL_Offset;
+ GLint subDescIndex;
+ R700ControlFlowGenericClause* cf_ptr;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr;
+ R700ControlFlowGenericClause* finale_cf_ptr;
+} CALLER_POINTER;
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+
+typedef struct CALL_LEVEL
+{
+ unsigned int FCSP_BeforeEntry;
+ GLint subDescIndex;
+ GLushort current;
+ GLushort max;
+ TypedShaderList * plstCFInstructions_local;
+} CALL_LEVEL;
+
+#define HAS_CURRENT_LOOPRET 0x1L
+#define HAS_LOOPRET 0x2L
+#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
+
typedef struct r700_AssemblerBase
{
R700ControlFlowSXClause* cf_last_export_ptr;
@@ -294,14 +379,19 @@ typedef struct r700_AssemblerBase
// No clause has been created yet
CF_CLAUSE_TYPE cf_current_clause_type;
+ BITS alu_x_opcode;
+
GLuint number_of_exports;
GLuint number_of_colorandz_exports;
GLuint number_of_export_opcodes;
PVSDWORD D;
+ PVSDWORD D2;
PVSDWORD S[3];
+ PVSDWORD C[4];
unsigned int uLastPosUpdate;
+ unsigned int last_cond_register;
OUT_FRAGMENT_FMT_0 fp_stOutFmt0;
@@ -310,6 +400,8 @@ typedef struct r700_AssemblerBase
unsigned int number_used_registers;
unsigned int uUsedConsts;
+ unsigned int flag_reg_index;
+
// Fragment programs
unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
@@ -330,9 +422,6 @@ typedef struct r700_AssemblerBase
unsigned int FCSP;
FC_LEVEL fc_stack[32];
- unsigned int branch_depth;
- unsigned int max_branch_depth;
-
//-----------------------------------------------------------------------------------
// ArgSubst used in Assemble_Source() function
//-----------------------------------------------------------------------------------
@@ -373,11 +462,33 @@ typedef struct r700_AssemblerBase
SHADER_PIPE_TYPE currentShaderType;
struct prog_instruction * pILInst;
GLuint uiCurInst;
+ GLubyte SamplerUnits[MAX_SAMPLERS];
GLboolean bR6xx;
/* helper to decide which type of instruction to assemble */
GLboolean is_tex;
/* we inserted helper intructions and need barrier on next TEX ins */
GLboolean need_tex_barrier;
+
+ SUB_OFFSET * subs;
+ GLuint unSubArraySize;
+ GLuint unSubArrayPointer;
+ CALLER_POINTER * callers;
+ GLuint unCallerArraySize;
+ GLuint unCallerArrayPointer;
+ unsigned int CALLSP;
+ CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH];
+
+ GLuint unCFflags;
+
+ PRESUB_DESC * presubs;
+ GLuint unPresubArraySize;
+ GLuint unNumPresub;
+ GLuint unCurNumILInsts;
+
+ GLuint unVetTexBits;
+
+ GLuint shadow_regs[R700_MAX_TEXTURE_UNITS];
+
} r700_AssemblerBase;
//Internal use
@@ -399,7 +510,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
GLboolean is_reduction_opcode(PVSDWORD * dest);
GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm);
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
GLboolean IsTex(gl_inst_opcode Opcode);
GLboolean IsAlu(gl_inst_opcode Opcode);
@@ -422,6 +533,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLubyte element,
GLuint _signed,
GLboolean normalize,
+ GLenum format,
VTX_FETCH_METHOD * pFetchMethod);
GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
@@ -446,6 +558,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr,
GLuint contiguous_slots_needed);
+
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm);
+void add_return_inst(r700_AssemblerBase *pAsm);
+
void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
int source_index,
BITS* psrc_sel,
@@ -467,21 +583,31 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
GLboolean next_ins(r700_AssemblerBase *pAsm);
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
+GLboolean testFlag(r700_AssemblerBase *pAsm);
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP);
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF);
+
GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
GLboolean assemble_BAD(char *opcode_str);
GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
-GLboolean assemble_COS(r700_AssemblerBase *pAsm);
GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
GLboolean assemble_DST(r700_AssemblerBase *pAsm);
GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
@@ -491,17 +617,37 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
GLboolean assemble_POW(r700_AssemblerBase *pAsm);
GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
-GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode);
+
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
-GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse);
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm);
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm);
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
+GLboolean assemble_COND(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift);
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
+GLboolean assemble_RET(r700_AssemblerBase *pAsm);
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
+ GLint nILindex,
+ GLuint uiIL_Offest,
+ GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc);
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -511,14 +657,25 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
BITS depth_channel_select);
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ /* struct prog_instruction ** pILInstParent, */
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc);
//Interface
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
+ GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode);
GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg);
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
+
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 3b7f6fffe0..3bc2d2ba02 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -45,6 +45,9 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
struct radeon_bo *bo = NULL;
unsigned int i;
BATCH_LOCALS(&context->radeon);
@@ -52,13 +55,14 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
if (t) {
- if (!t->image_override)
+ if (!t->image_override) {
bo = t->mt->bo;
- else
+ } else {
bo = t->bo;
+ }
if (bo) {
r700SyncSurf(context, bo,
@@ -67,7 +71,16 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH(i * 7);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 7);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
@@ -77,7 +90,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
bo,
- 0,
+ r700->textures[i]->SQ_TEX_RESOURCE2,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
bo,
@@ -91,21 +104,35 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
}
}
+#define SAMPLER_STRIDE 3
+
static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
unsigned int i;
+
+ struct r700_vertex_program *vp = context->selected_vp;
+
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
radeonTexObj *t = r700->textures[i];
if (t) {
BEGIN_BATCH_NO_AUTOSTATE(5);
R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
- R600_OUT_BATCH(i * 3);
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 3);
+ }
+
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
@@ -141,77 +168,10 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom
}
}
+extern int getTypeSize(GLenum type);
static void r700SetupVTXConstants(GLcontext * ctx,
- unsigned int nStreamID,
void * pAos,
- unsigned int size, /* number of elements in vector */
- unsigned int stride,
- unsigned int count) /* number of vectors in stream */
-{
- context_t *context = R700_CONTEXT(ctx);
- struct radeon_aos * paos = (struct radeon_aos *)pAos;
- BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
-
- unsigned int uSQ_VTX_CONSTANT_WORD0_0;
- unsigned int uSQ_VTX_CONSTANT_WORD1_0;
- unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
- unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
- unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
-
- if (!paos->bo)
- return;
-
- if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
- (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
- (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
- (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
- (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
- r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
- else
- r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
-
- uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
- uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1;
-
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
- SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL),
- SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
- SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
- SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
- SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
- SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
-
- SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
- SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
- SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
-
- BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
- R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
- R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
- R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
- R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
- R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
- R600_OUT_BATCH(0);
- R600_OUT_BATCH(0);
- R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0);
- R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
- paos->bo,
- uSQ_VTX_CONSTANT_WORD0_0,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_BATCH();
- COMMIT_BATCH();
-
-}
-
-extern int getTypeSize(GLenum type);
-static void r700SetupVTXConstants2(GLcontext * ctx,
- void * pAos,
- StreamDesc * pStreamDesc)
+ StreamDesc * pStreamDesc)
{
context_t *context = R700_CONTEXT(ctx);
struct radeon_aos * paos = (struct radeon_aos *)pAos;
@@ -295,31 +255,6 @@ static void r700SetupVTXConstants2(GLcontext * ctx,
}
-void r700SetupStreams(GLcontext *ctx)
-{
- context_t *context = R700_CONTEXT(ctx);
- struct r700_vertex_program *vp = context->selected_vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int i, j = 0;
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
-
- R600_STATECHANGE(context, vtx);
-
- for(i=0; i<VERT_ATTRIB_MAX; i++) {
- if(vp->mesa_program->Base.InputsRead & (1 << i)) {
- rcommon_emit_vector(ctx,
- &context->radeon.tcl.aos[j],
- vb->AttribPtr[i]->data,
- vb->AttribPtr[i]->size,
- vb->AttribPtr[i]->stride,
- vb->Count);
- j++;
- }
- }
- context->radeon.tcl.aos_count = j;
-}
-
static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
{
context_t *context = R700_CONTEXT(ctx);
@@ -343,25 +278,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
COMMIT_BATCH();
for(i=0; i<VERT_ATTRIB_MAX; i++) {
- if(vp->mesa_program->Base.InputsRead & (1 << i))
- {
- if(1 == context->selected_vp->uiVersion)
- {
- /* currently aos are packed */
- r700SetupVTXConstants(ctx,
- i,
- (void*)(&context->radeon.tcl.aos[j]),
- (unsigned int)context->radeon.tcl.aos[j].components,
- (unsigned int)context->radeon.tcl.aos[j].stride * 4,
- (unsigned int)context->radeon.tcl.aos[j].count);
- }
- else
- { /* context->selected_vp->uiVersion == 2 : aos not always packed */
- r700SetupVTXConstants2(ctx,
- (void*)(&context->radeon.tcl.aos[j]),
- &(context->stream_desc[j]));
- }
- j++;
+ if(vp->mesa_program->Base.InputsRead & (1 << i))
+ {
+ r700SetupVTXConstants(ctx,
+ (void*)(&context->radeon.tcl.aos[j]),
+ &(context->stream_desc[j]));
+ j++;
}
}
}
@@ -463,7 +385,6 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
rrb = radeon_get_depthbuffer(&context->radeon);
if (!rrb || !rrb->bo) {
- fprintf(stderr, "no rrb\n");
return;
}
@@ -505,7 +426,6 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
rrb = radeon_get_colorbuffer(&context->radeon);
if (!rrb || !rrb->bo) {
- fprintf(stderr, "no rrb\n");
return;
}
@@ -549,68 +469,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
{
- context_t *context = R700_CONTEXT(ctx);
- R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
- struct radeon_bo * pbo;
- BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
+ pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
- if (!pbo)
- return;
+ if (!pbo)
+ return;
- r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
- BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
- R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
- R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
- R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
- pbo,
- r700->ps.SQ_PGM_START_PS.u32All,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+ R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
+ R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+ pbo,
+ r700->ps.SQ_PGM_START_PS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(9);
- R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+ END_BATCH();
- COMMIT_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF);
+ END_BATCH();
+
+ COMMIT_BATCH();
}
static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
{
- context_t *context = R700_CONTEXT(ctx);
- R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
- struct radeon_bo * pbo;
- BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
- if (!pbo)
- return;
+ if (!pbo)
+ return;
- r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
- BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
- R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
- R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
- R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
- pbo,
- r700->vs.SQ_PGM_START_VS.u32All,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+ R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
+ R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+ pbo,
+ r700->vs.SQ_PGM_START_VS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(6);
- R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+ END_BATCH();
- COMMIT_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F);
+ //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
+ END_BATCH();
+
+ COMMIT_BATCH();
}
static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
@@ -891,8 +820,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- BEGIN_BATCH_NO_AUTOSTATE(23);
- R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All);
+ BEGIN_BATCH_NO_AUTOSTATE(17);
R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
@@ -905,7 +833,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
- R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All);
R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
END_BATCH();
@@ -1205,6 +1132,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
COMMIT_BATCH();
}
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+ BATCH_LOCALS(radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ /* clear the buffer */
+ radeon_bo_map(query->bo, GL_FALSE);
+ memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+ radeon_bo_unmap(query->bo);
+
+ radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+ query->bo,
+ 0, RADEON_GEM_DOMAIN_GTT);
+
+ BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+ R600_OUT_BATCH(ZPASS_DONE);
+ R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+ R600_OUT_BATCH(0x00000000);
+ R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+ query->emitted_begin = GL_TRUE;
+}
+
static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
{
return atom->cmd_size;
@@ -1233,7 +1186,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
count += 3;
if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
- for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+ /* targets are enabled in r700SetRenderTarget but state
+ size is calculated before that. Until MRT's are done
+ hardcode target0 as enabled. */
+ count += 3;
+ for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
if (r700->render_target[ui].enabled)
count += 3;
}
@@ -1313,6 +1270,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
return count;
}
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ struct radeon_query_object *query = radeon->query.current;
+ int count;
+
+ if (!query || query->emitted_begin)
+ count = 0;
+ else
+ count = atom->cmd_size;
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count;
+}
+
#define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \
do { \
context->atoms.ATOM.cmd_size = (SZ); \
@@ -1326,6 +1297,19 @@ do { \
insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
} while (0)
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+ radeon->query.queryobj.cmd_size = (SZ);
+ radeon->query.queryobj.cmd = NULL;
+ radeon->query.queryobj.name = "queryobj";
+ radeon->query.queryobj.idx = 0;
+ radeon->query.queryobj.check = check_queryobj;
+ radeon->query.queryobj.dirty = GL_FALSE;
+ radeon->query.queryobj.emit = r700SendQueryBegin;
+ radeon->hw.max_state_size += (SZ);
+ insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
void r600InitAtoms(context_t *context)
{
radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
@@ -1336,7 +1320,7 @@ void r600InitAtoms(context_t *context)
context->radeon.hw.atomlist.name = "atom-list";
ALLOC_STATE(sq, always, 34, r700SendSQConfig);
- ALLOC_STATE(db, always, 23, r700SendDBState);
+ ALLOC_STATE(db, always, 17, r700SendDBState);
ALLOC_STATE(stencil, always, 4, r700SendStencilState);
ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
ALLOC_STATE(sc, always, 15, r700SendSCState);
@@ -1349,22 +1333,23 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+ ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
- ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
ALLOC_STATE(vgt, always, 41, r700SendVGTState);
ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
ALLOC_STATE(vpt, always, 16, r700SendViewportState);
ALLOC_STATE(fs, always, 18, r700SendFSState);
- ALLOC_STATE(vs, always, 18, r700SendVSState);
- ALLOC_STATE(ps, always, 21, r700SendPSState);
+ ALLOC_STATE(vs, always, 21, r700SendVSState);
+ ALLOC_STATE(ps, always, 24, r700SendPSState);
ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+ r600_init_query_stateobj(&context->radeon, 6 * 2);
context->radeon.hw.is_dirty = GL_TRUE;
context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
index c6546ab00c..98bfdd0937 100644
--- a/src/mesa/drivers/dri/r600/r700_clear.c
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -49,14 +49,18 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
void r700Clear(GLcontext * ctx, GLbitfield mask)
{
context_t *context = R700_CONTEXT(ctx);
- __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
- const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
+ __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+ const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
GLbitfield swrast_mask = 0, tri_mask = 0;
int i;
struct gl_framebuffer *fb = ctx->DrawBuffer;
radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask);
+ if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+ context->radeon.front_buffer_dirty = GL_TRUE;
+ }
+
if( GL_TRUE == r700ClearFast(context, mask) )
{
return;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 62a1ea1a22..84d51e6606 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -34,6 +34,7 @@
#include "main/imports.h"
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
+#include "shader/program.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
@@ -42,14 +43,68 @@
#include "r700_debug.h"
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+ static const gl_state_index winstate[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+ struct prog_instruction *newInst, *inst;
+ GLint win_size; /* state reference */
+ GLuint wpos_temp; /* temp register */
+ int i, j;
+
+ /* PARAM win_size = STATE_FB_SIZE */
+ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+ wpos_temp = fprog->Base.NumTemporaries++;
+
+ /* scan program where WPOS is used and replace with wpos_temp */
+ inst = fprog->Base.Instructions;
+ for (i = 0; i < fprog->Base.NumInstructions; i++) {
+ for (j=0; j < 3; j++) {
+ if(inst->SrcReg[j].File == PROGRAM_INPUT &&
+ inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+ inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[j].Index = wpos_temp;
+ }
+ }
+ inst++;
+ }
+
+ _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+ newInst = fprog->Base.Instructions;
+ /* invert wpos.y
+ * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+ newInst[0].Opcode = OPCODE_ADD;
+ newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+ newInst[0].DstReg.Index = wpos_temp;
+ newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+ newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+ newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+ newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+ newInst[0].SrcReg[1].Index = win_size;
+ newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
//TODO : Validate FP input with VP output.
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
- struct gl_fragment_program *mesa_fp)
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
{
unsigned int unBit;
unsigned int i;
GLuint ui;
+ /* match fp inputs with vp exports. */
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
pAsm->number_used_registers = 0;
//Input mapping : mesa_fp->Base.InputsRead set the flag, set in
@@ -61,32 +116,99 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_COL0;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_COL1;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_FOGC;
- if(mesa_fp->Base.InputsRead & unBit)
- {
- pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
- }
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+ }
for(i=0; i<8; i++)
{
- unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
}
}
+
+/* order has been taken care of */
+#if 1
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
+ }
+ }
+#else
+ if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
+ {
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
+ struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
+ struct gl_program_parameter * pVsParam;
+ struct gl_program_parameter * pPsParam;
+ GLuint j, k;
+ GLuint unMaxVarying = 0;
+
+ for(i=0; i<VsVarying->NumParameters; i++)
+ {
+ pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
+ }
+
+ for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ j = i - FRAG_ATTRIB_VAR0;
+ pPsParam = PsVarying->Parameters + j;
+
+ for(k=0; k<VsVarying->NumParameters; k++)
+ {
+ pVsParam = VsVarying->Parameters + k;
+
+ if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
+ {
+ pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;
+ if(k > unMaxVarying)
+ {
+ unMaxVarying = k;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ pAsm->number_used_registers += unMaxVarying + 1;
+ }
+#endif
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+ }
/* Map temporary registers (GPRs) */
pAsm->starting_temp_register_number = pAsm->number_used_registers;
@@ -127,6 +249,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->pucOutMask[ui] = 0x0;
}
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
@@ -135,15 +259,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{
GLuint i, j;
GLint * puiTEMPwrites;
+ GLint * puiTEMPreads;
struct prog_instruction * pILInst;
InstDeps *pInstDeps;
struct prog_instruction * texcoord_DepInst;
GLint nDepInstID;
puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+ puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+
for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
{
puiTEMPwrites[i] = -1;
+ puiTEMPreads[i] = -1;
}
pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
@@ -167,6 +295,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{
//Set dep.
pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ //Set first read
+ if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
+ {
+ puiTEMPreads[pILInst->SrcReg[j].Index] = i;
+ }
}
else
{
@@ -177,8 +310,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
fp->r700AsmCode.pInstDeps = pInstDeps;
- FREE(puiTEMPwrites);
-
//Find dep for tex inst
for(i=0; i<mesa_fp->Base.NumInstructions; i++)
{
@@ -203,29 +334,84 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
{ //... other deps?
}
}
+ // make sure that we dont overwrite src used earlier
+ nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
+ if(nDepInstID < i)
+ {
+ pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
+ texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+ if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[nDepInstID].nDstDep = i;
+ }
+
+ }
+
}
}
+ FREE(puiTEMPwrites);
+ FREE(puiTEMPreads);
+
return GL_TRUE;
}
GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
- struct gl_fragment_program *mesa_fp)
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
{
GLuint number_of_colors_exported;
GLboolean z_enabled = GL_FALSE;
- GLuint unBit;
+ GLuint unBit, shadow_unit;
+ int i;
+ struct prog_instruction *inst;
+ gl_state_index shadow_ambient[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
//Init_Program
Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
- Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
+
+ if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+ {
+ insert_wpos_code(ctx, mesa_fp);
+ }
+
+ /* add/map consts for ARB_shadow_ambient */
+ if(mesa_fp->Base.ShadowSamplers)
+ {
+ inst = mesa_fp->Base.Instructions;
+ for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
+ {
+ if(inst->TexShadow == 1)
+ {
+ shadow_unit = inst->TexSrcUnit;
+ shadow_ambient[2] = shadow_unit;
+ fp->r700AsmCode.shadow_regs[shadow_unit] =
+ _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
+ }
+ inst++;
+ }
+ }
+
+ Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
{
return GL_FALSE;
}
+
+ InitShaderProgram(&(fp->r700AsmCode));
- if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
+ }
+
+ fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
+
+ if( GL_FALSE == AssembleInstr(0,
+ 0,
+ mesa_fp->Base.NumInstructions,
&(mesa_fp->Base.Instructions[0]),
&(fp->r700AsmCode)) )
{
@@ -237,6 +423,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
return GL_FALSE;
}
+ if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
+ {
+ return GL_FALSE;
+ }
+
fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
: (fp->r700AsmCode.number_used_registers - 1);
@@ -251,7 +442,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
number_of_colors_exported--;
}
- fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+ /* illegal to set this to 0 */
+ if(number_of_colors_exported || z_enabled)
+ {
+ fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+ }
+ else
+ {
+ fp->r700Shader.exportMode = (1 << 1);
+ }
fp->translated = GL_TRUE;
@@ -269,7 +468,7 @@ void r700SelectFragmentShader(GLcontext *ctx)
}
if (GL_FALSE == fp->translated)
- r700TranslateFragmentShader(fp, &(fp->mesa_program));
+ r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx);
}
void * r700GetActiveFpShaderBo(GLcontext * ctx)
@@ -294,6 +493,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
unsigned int unNumOfReg;
unsigned int unBit;
GLuint exportCount;
+ GLboolean point_sprite = GL_FALSE;
if(GL_FALSE == fp->loaded)
{
@@ -347,6 +547,50 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
}
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
+ {
+ ui += 1;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
+ SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
+ }
+ else
+ {
+ CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ }
+
+ /* see if we need any point_sprite replacements */
+ for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++)
+ {
+ if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE)
+ point_sprite = GL_TRUE;
+ }
+
+ if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
+ {
+ /* for FRAG_ATTRIB_PNTC we need to increase num_interp */
+ if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+ {
+ ui++;
+ SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ }
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
+ SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
+ if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+ SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ else
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ }
+ else
+ {
+ CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ }
+
+
ui = (unNumOfReg < ui) ? ui : unNumOfReg;
SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
@@ -362,27 +606,14 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
EXPORT_MODE_shift, EXPORT_MODE_mask);
- R600_STATECHANGE(context, db);
-
- if(fp->r700Shader.killIsUsed)
- {
- SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
- }
- else
- {
- CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
- }
-
- if(fp->r700Shader.depthIsExported)
- {
- SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
- }
- else
- {
- CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
- }
-
// emit ps input map
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+ for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+ r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
unBit = 1 << FRAG_ATTRIB_WPOS;
if(mesa_fp->Base.InputsRead & unBit)
{
@@ -396,8 +627,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_COL0;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -409,8 +640,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_COL1;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -422,8 +653,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_FOGC;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -437,25 +668,79 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
for(i=0; i<8; i++)
{
- unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
SEMANTIC_shift, SEMANTIC_mask);
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ /* ARB_point_sprite */
+ if(ctx->Point.CoordReplace[i] == GL_TRUE)
+ {
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
}
}
- R600_STATECHANGE(context, cb);
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
+
+
+
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ }
+
exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
- r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+ if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
+ {
+ R600_STATECHANGE(context, cb);
+ r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+ }
/* sent out shader constants. */
paramList = fp->mesa_program.Base.Parameters;
- if(NULL != paramList) {
+ if(NULL != paramList)
+ {
_mesa_load_state_parameters(ctx, paramList);
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
@@ -468,14 +753,33 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
}
} else
r700->ps.num_consts = 0;
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = r700->ps.num_consts;
+ for(ui=0; ui<pAsm->unNumPresub; ui++)
+ {
+ pCompiledSub = pAsm->presubs[ui].pCompiledSub;
+
+ r700->ps.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
index cbb108d212..39c59c9201 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.h
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -48,13 +48,17 @@ struct r700_fragment_program
};
/* Internal */
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
- struct gl_fragment_program *mesa_fp);
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx);
GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
struct gl_fragment_program *mesa_fp);
GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
- struct gl_fragment_program *mesa_vp);
+ struct gl_fragment_program *mesa_vp,
+ GLcontext *ctx);
/* Interface */
extern void r700SelectFragmentShader(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c
index 5290ef31be..0d476fcd86 100644
--- a/src/mesa/drivers/dri/r600/r700_oglprog.c
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.c
@@ -40,6 +40,24 @@
#include "r700_vertprog.h"
+static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache)
+{
+ struct r700_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ /* Release DMA region */
+ r600DeleteShader(ctx, vp->shaderbo);
+ /* Clean up */
+ Clean_Up_Assembler(&(vp->r700AsmCode));
+ Clean_Up_Shader(&(vp->r700Shader));
+
+ _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL);
+ _mesa_free(vp);
+ vp = tmp;
+ }
+}
+
static struct gl_program *r700NewProgram(GLcontext * ctx,
GLenum target,
GLuint id)
@@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx,
static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
- struct r700_vertex_program_cont * vpc;
- struct r700_vertex_program *vp, *tmp;
+ struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
struct r700_fragment_program * fp;
radeon_print(RADEON_SHADER, RADEON_VERBOSE,
@@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
- vpc = (struct r700_vertex_program_cont*)prog;
- vp = vpc->progs;
- while (vp) {
- tmp = vp->next;
- /* Release DMA region */
-
- r600DeleteShader(ctx, vp->shaderbo);
-
- /* Clean up */
- Clean_Up_Assembler(&(vp->r700AsmCode));
- Clean_Up_Shader(&(vp->r700Shader));
- _mesa_free(vp);
- vp = tmp;
- }
+ freeVertProgCache(ctx, vpc);
break;
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
@@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
static void
r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
-
+ struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
+ struct r700_fragment_program * fp = (struct r700_fragment_program*)prog;
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vpc);
+ vpc->progs = NULL;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ r600DeleteShader(ctx, fp->shaderbo);
+ Clean_Up_Assembler(&(fp->r700AsmCode));
+ Clean_Up_Shader(&(fp->r700Shader));
+ fp->translated = GL_FALSE;
+ fp->loaded = GL_FALSE;
+ fp->shaderbo = NULL;
+ break;
+ }
+
}
static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 4f39d9f1bd..eab27cbd84 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -59,9 +59,7 @@
void r700WaitForIdle(context_t *context);
void r700WaitForIdleClean(context_t *context);
-GLboolean r700SendTextureState(context_t *context);
static unsigned int r700PrimitiveType(int prim);
-void r600UpdateTextureState(GLcontext * ctx);
GLboolean r700SyncSurf(context_t *context,
struct radeon_bo *pbo,
uint32_t read_domain,
@@ -253,24 +251,12 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
{
context_t *context = R700_CONTEXT(ctx);
BATCH_LOCALS(&context->radeon);
- int type, i, total_emit;
+ int type, total_emit;
int num_indices;
uint32_t vgt_draw_initiator = 0;
uint32_t vgt_index_type = 0;
uint32_t vgt_primitive_type = 0;
uint32_t vgt_num_indices = 0;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- GLboolean bUseDrawIndex;
-
- if(NULL != context->ind_buf.bo)
- {
- bUseDrawIndex = GL_TRUE;
- }
- else
- {
- bUseDrawIndex = GL_FALSE;
- }
type = r700PrimitiveType(prim);
num_indices = r700NumVerts(end - start, prim);
@@ -282,90 +268,153 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
if (type < 0 || num_indices <= 0)
return;
- if(GL_TRUE == bUseDrawIndex)
- {
- total_emit = 3 /* VGT_PRIMITIVE_TYPE */
- + 2 /* VGT_INDEX_TYPE */
- + 2 /* NUM_INSTANCES */
- + 5 + 2; /* DRAW_INDEX */
- }
- else
- {
- total_emit = 3 /* VGT_PRIMITIVE_TYPE */
- + 2 /* VGT_INDEX_TYPE */
- + 2 /* NUM_INSTANCES */
- + num_indices + 3; /* DRAW_INDEX_IMMD */
- }
-
- BEGIN_BATCH_NO_AUTOSTATE(total_emit);
- // prim
SETfield(vgt_primitive_type, type,
VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
- R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
- R600_OUT_BATCH(vgt_primitive_type);
- // index type
SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- if(GL_TRUE == bUseDrawIndex)
+ if(GL_TRUE != context->ind_buf.is_32bit)
{
- if(GL_TRUE != context->ind_buf.is_32bit)
- {
SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- }
}
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 5 + 2; /* DRAW_INDEX */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
R600_OUT_BATCH(vgt_index_type);
-
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
-
// draw packet
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+ R600_OUT_BATCH(context->ind_buf.bo_offset);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+ context->ind_buf.bo,
+ context->ind_buf.bo_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, i;
+ uint32_t num_indices, total_emit = 0;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ if (num_indices > 0xffff)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+ else
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
- if(GL_TRUE == bUseDrawIndex)
+ if (start == 0)
{
- SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
}
else
{
- SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ if (num_indices > 0xffff)
+ {
+ total_emit += num_indices;
+ }
+ else
+ {
+ total_emit += (num_indices + 1) / 2;
+ }
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
}
- SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+ total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 3; /* DRAW */
- if(GL_TRUE == bUseDrawIndex)
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ // draw packet
+ if(start == 0)
{
- R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
- R600_OUT_BATCH(context->ind_buf.bo_offset);
- R600_OUT_BATCH(0);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
R600_OUT_BATCH(vgt_num_indices);
R600_OUT_BATCH(vgt_draw_initiator);
- R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
- context->ind_buf.bo,
- context->ind_buf.bo_offset,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
}
else
{
- R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
- R600_OUT_BATCH(vgt_num_indices);
- R600_OUT_BATCH(vgt_draw_initiator);
-
- for (i = start; i < (start + num_indices); i++)
- {
- if(vb->Elts)
- {
- R600_OUT_BATCH(vb->Elts[i]);
- }
- else
+ if (num_indices > 0xffff)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i++)
{
- R600_OUT_BATCH(i);
+ R600_OUT_BATCH(i);
}
- }
+ }
+ else
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i += 2)
+ {
+ if ((i + 1) == (start + num_indices))
+ {
+ R600_OUT_BATCH(i);
+ }
+ else
+ {
+ R600_OUT_BATCH(((i + 1) << 16) | (i));
+ }
+ }
+ }
}
END_BATCH();
@@ -375,173 +424,45 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
/* start 3d, idle, cb/db flush */
#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
-static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims)
+static GLuint r700PredictRenderSize(GLcontext* ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint nr_prims)
{
context_t *context = R700_CONTEXT(ctx);
- struct r700_vertex_program *vp = context->selected_vp;
GLboolean flushed;
GLuint dwords, i;
GLuint state_size;
- /* pre calculate aos count so state prediction works */
- context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
dwords = PRE_EMIT_STATE_BUFSZ;
- if (nr_prims)
+ if (ib)
dwords += nr_prims * 14;
else {
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
-
- for (i = 0; i < vb->PrimitiveCount; i++)
- dwords += vb->Primitive[i].count + 10;
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (prim[i].start == 0)
+ dwords += 10;
+ else if (prim[i].count > 0xffff)
+ dwords += prim[i].count + 10;
+ else
+ dwords += ((prim[i].count + 1) / 2) + 10;
+ }
}
+
state_size = radeonCountStateEmitSize(&context->radeon);
flushed = rcommonEnsureCmdBufSpace(&context->radeon,
- dwords + state_size, __FUNCTION__);
-
+ dwords + state_size,
+ __FUNCTION__);
if (flushed)
- dwords += radeonCountStateEmitSize(&context->radeon);
+ dwords += radeonCountStateEmitSize(&context->radeon);
else
- dwords += state_size;
+ dwords += state_size;
- radeon_print(RADEON_RENDER, RADEON_VERBOSE,
- "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
return dwords;
-}
-
-static GLboolean r700RunRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage)
-{
- context_t *context = R700_CONTEXT(ctx);
- radeonContextPtr radeon = &context->radeon;
- unsigned int i, id = 0;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- struct radeon_renderbuffer *rrb;
-
- radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
- __func__, context->radeon.cmdbuf.cs->cdw);
-
- /* always emit CB base to prevent
- * lock ups on some chips.
- */
- R600_STATECHANGE(context, cb_target);
- /* mark vtx as dirty since it changes per-draw */
- R600_STATECHANGE(context, vtx);
-
- r700SetScissor(context);
- r700SetupVertexProgram(ctx);
- r700SetupFragmentProgram(ctx);
- r600UpdateTextureState(ctx);
-
- GLuint emit_end = r700PredictRenderSize(ctx, 0)
- + context->radeon.cmdbuf.cs->cdw;
- r700SetupStreams(ctx);
-
- radeonEmitState(radeon);
-
- radeon_debug_add_indent();
- /* richard test code */
- for (i = 0; i < vb->PrimitiveCount; i++) {
- GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
- GLuint start = vb->Primitive[i].start;
- GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
- r700RunRenderPrimitive(ctx, start, end, prim);
- }
- radeon_debug_remove_indent();
-
- /* Flush render op cached for last several quads. */
- r700WaitForIdleClean(context);
-
- rrb = radeon_get_colorbuffer(&context->radeon);
- if (rrb && rrb->bo)
- r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
- CB_ACTION_ENA_bit | (1 << (id + 6)));
-
- rrb = radeon_get_depthbuffer(&context->radeon);
- if (rrb && rrb->bo)
- r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
- DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
-
- radeonReleaseArrays(ctx, ~0);
-
- radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
- __func__, context->radeon.cmdbuf.cs->cdw);
-
- if ( emit_end < context->radeon.cmdbuf.cs->cdw )
- WARN_ONCE("Rendering was %d commands larger than predicted size."
- " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
-
- return GL_FALSE;
-}
-
-static GLboolean r700RunNonTCLRender(GLcontext * ctx,
- struct tnl_pipeline_stage *stage) /* -------------------- */
-{
- GLboolean bRet = GL_TRUE;
-
- return bRet;
-}
-
-static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
- struct tnl_pipeline_stage *stage)
-{
- GLboolean bRet = GL_FALSE;
- /* TODO : sw fallback */
-
- /* Need shader bo's setup before bo check */
- r700UpdateShaders(ctx);
- /**
-
- * Ensure all enabled and complete textures are uploaded along with any buffers being used.
- */
- if(!r600ValidateBuffers(ctx))
- {
- return GL_TRUE;
- }
-
- bRet = r700RunRender(ctx, stage);
-
- return bRet;
- //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
- //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
}
-const struct tnl_pipeline_stage _r700_render_stage = {
- "r700 Hardware Rasterization",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunNonTCLRender
-};
-
-const struct tnl_pipeline_stage _r700_tcl_stage = {
- "r700 Hardware Transform, Clipping and Lighting",
- NULL,
- NULL,
- NULL,
- NULL,
- r700RunTCLRender
-};
-
-const struct tnl_pipeline_stage *r700_pipeline[] =
-{
- &_r700_tcl_stage,
- &_tnl_vertex_transform_stage,
- &_tnl_normal_transform_stage,
- &_tnl_lighting_stage,
- &_tnl_fog_coordinate_stage,
- &_tnl_texgen_stage,
- &_tnl_texture_transform_stage,
- &_tnl_vertex_program_stage,
-
- &_r700_render_stage,
- &_tnl_render_stage,
- 0,
-};
-
#define CONVERT( TYPE, MACRO ) do { \
GLuint i, j, sz; \
sz = input->Size; \
@@ -605,6 +526,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
sizeof(GLfloat) * input->Size * count, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
assert(src_ptr != NULL);
@@ -638,6 +562,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
break;
}
+ radeon_bo_unmap(attr->bo);
+
if (mapped_named_bo)
{
ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -656,6 +582,8 @@ static void r700AlignDataToDword(GLcontext *ctx,
radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+ radeon_bo_map(attr->bo, 1);
+
if (!input->BufferObj->Pointer)
{
ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
@@ -675,6 +603,7 @@ static void r700AlignDataToDword(GLcontext *ctx,
}
}
+ radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -683,7 +612,7 @@ static void r700AlignDataToDword(GLcontext *ctx,
attr->stride = dst_stride;
}
-static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count)
+static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count)
{
context_t *context = R700_CONTEXT(ctx);
GLuint stride;
@@ -743,32 +672,33 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu
radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
&context->stream_desc[index].bo_offset, size, 32);
+
+ radeon_bo_map(context->stream_desc[index].bo, 1);
assert(context->stream_desc[index].bo->ptr != NULL);
+
+
dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
context->stream_desc[index].bo_offset);
switch (context->stream_desc[index].dwords)
{
case 1:
- radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
- context->stream_desc[index].stride = 4;
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
break;
case 2:
radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
- context->stream_desc[index].stride = 8;
break;
case 3:
radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
- context->stream_desc[index].stride = 12;
break;
case 4:
radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
- context->stream_desc[index].stride = 16;
break;
default:
assert(0);
break;
}
+ radeon_bo_unmap(context->stream_desc[index].bo);
}
}
@@ -786,7 +716,6 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu
}
}
- context->radeon.tcl.aos_count = context->nNumActiveAos;
ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
first_elem(&context->radeon.dma.reserved)->bo,
RADEON_GEM_DOMAIN_GTT, 0);
@@ -841,6 +770,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
@@ -854,6 +784,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
*out++ = in[i];
}
+ radeon_bo_unmap(context->ind_buf.bo);
#if MESA_BIG_ENDIAN
}
else
@@ -864,6 +795,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
@@ -876,6 +808,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
{
*out++ = in[i];
}
+ radeon_bo_unmap(context->ind_buf.bo);
#endif
}
@@ -921,11 +854,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
_mesa_memcpy(dst_ptr, src_ptr, size);
+ radeon_bo_unmap(context->ind_buf.bo);
context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
context->ind_buf.count = mesa_ind_buf->count;
@@ -941,12 +876,12 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
}
static GLboolean r700TryDrawPrims(GLcontext *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint min_index,
- GLuint max_index )
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
{
context_t *context = R700_CONTEXT(ctx);
radeonContextPtr radeon = &context->radeon;
@@ -954,14 +889,12 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
struct radeon_renderbuffer *rrb;
if (ctx->NewState)
- {
_mesa_update_state( ctx );
- }
_tnl_UpdateFixedFunctionProgram(ctx);
r700SetVertexFormat(ctx, arrays, max_index + 1);
/* shaders need to be updated before buffers are validated */
- r700UpdateShaders2(ctx);
+ r700UpdateShaders(ctx);
if (!r600ValidateBuffers(ctx))
return GL_FALSE;
@@ -975,23 +908,29 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
r700SetScissor(context);
r700SetupVertexProgram(ctx);
r700SetupFragmentProgram(ctx);
- r600UpdateTextureState(ctx);
+ r700UpdateShaderStates(ctx);
- GLuint emit_end = r700PredictRenderSize(ctx, nr_prims)
+ GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
+ context->radeon.cmdbuf.cs->cdw;
r700SetupIndexBuffer(ctx, ib);
- r700SetupStreams2(ctx, arrays, max_index + 1);
+ r700SetupStreams(ctx, arrays, max_index + 1);
radeonEmitState(radeon);
radeon_debug_add_indent();
for (i = 0; i < nr_prims; ++i)
{
- r700RunRenderPrimitive(ctx,
- prim[i].start,
- prim[i].start + prim[i].count,
- prim[i].mode);
+ if (context->ind_buf.bo)
+ r700RunRenderPrimitive(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ else
+ r700RunRenderPrimitiveImmediate(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
}
radeon_debug_remove_indent();
@@ -1019,18 +958,18 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
return GL_TRUE;
}
-static void r700DrawPrimsRe(GLcontext *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
+static void r700DrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
{
- GLboolean retval = GL_FALSE;
+ GLboolean retval = GL_FALSE;
- /* This check should get folded into just the places that
+ /* This check should get folded into just the places that
* min/max index are really needed.
*/
if (!index_bounds_valid) {
@@ -1038,7 +977,7 @@ static void r700DrawPrimsRe(GLcontext *ctx,
}
if (min_index) {
- vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe );
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
return;
}
@@ -1050,30 +989,6 @@ static void r700DrawPrimsRe(GLcontext *ctx,
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
-static void r700DrawPrims(GLcontext *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- context_t *context = R700_CONTEXT(ctx);
-
- /* For non indexed drawing, using tnl pipe. */
- if(!ib)
- {
- context->ind_buf.bo = NULL;
-
- _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib,
- index_bounds_valid, min_index, max_index);
- return;
- }
-
- r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index);
-}
-
void r700InitDraw(GLcontext *ctx)
{
struct vbo_context *vbo = vbo_context(ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c
index 955ea4e4e1..2eed1acc2f 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.c
+++ b/src/mesa/drivers/dri/r600/r700_shader.c
@@ -159,13 +159,18 @@ void Init_R700_Shader(R700_Shader * pShader)
pShader->lstVTXInstructions.uNumOfNode=0;
}
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF)
+{
+ pShader->plstCFInstructions_active = plstCF;
+}
+
void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
{
R700ControlFlowSXClause* pSXClause;
R700ControlFlowSMXClause* pSMXClause;
- pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode;
- AddInstToList(&(pShader->lstCFInstructions),
+ pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode;
+ AddInstToList(pShader->plstCFInstructions_active,
(R700ShaderInstruction*)pCFInst);
pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h
index 997cb05aaf..0599ffd901 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.h
+++ b/src/mesa/drivers/dri/r600/r700_shader.h
@@ -109,6 +109,7 @@ typedef struct R700_Shader
GLuint uStackSize;
GLuint uMaxCallDepth;
+ TypedShaderList * plstCFInstructions_active;
TypedShaderList lstCFInstructions;
TypedShaderList lstALUInstructions;
TypedShaderList lstTEXInstructions;
@@ -128,16 +129,17 @@ typedef struct R700_Shader
//Internal
void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
void ResolveLinks(R700_Shader *pShader);
void Assemble(R700_Shader *pShader);
-
//Interface
void Init_R700_Shader(R700_Shader * pShader);
void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst);
void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst);
void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst);
void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst);
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF);
void LoadProgram(R700_Shader *pShader);
void UpdateShaderRegisters(R700_Shader *pShader);
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h
index 2829cca0a3..cdb9a570f7 100644
--- a/src/mesa/drivers/dri/r600/r700_shaderinst.h
+++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h
@@ -42,6 +42,13 @@
#define SQ_FETCH_RESOURCE_VS_OFFSET 0x000000a0
#define SQ_FETCH_RESOURCE_VS_COUNT 0x000000b0
+//richard dec.10 glsl
+#define SQ_TEX_SAMPLER_PS_OFFSET 0x00000000
+#define SQ_TEX_SAMPLER_PS_COUNT 0x00000012
+#define SQ_TEX_SAMPLER_VS_OFFSET 0x00000012
+#define SQ_TEX_SAMPLER_VS_COUNT 0x00000012
+//-------------------
+
#define SHADERINST_TYPEMASK_CF 0x10
#define SHADERINST_TYPEMASK_ALU 0x20
#define SHADERINST_TYPEMASK_TEX 0x40
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 7e8b48f91e..3c8cb579f9 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -46,7 +46,6 @@
#include "shader/prog_parameter.h"
#include "shader/prog_statevars.h"
#include "vbo/vbo.h"
-#include "main/texformat.h"
#include "r600_context.h"
@@ -55,18 +54,15 @@
#include "r700_fragprog.h"
#include "r700_vertprog.h"
-
+void r600UpdateTextureState(GLcontext * ctx);
static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
static void r700UpdatePolygonMode(GLcontext * ctx);
static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
static void r700SetStencilState(GLcontext * ctx, GLboolean state);
-void r700UpdateShaders (GLcontext * ctx) //----------------------------------
+void r700UpdateShaders(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
- GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- int i;
/* should only happenen once, just after context is created */
/* TODO: shouldn't we fallback to sw here? */
@@ -77,40 +73,7 @@ void r700UpdateShaders (GLcontext * ctx) //----------------------------------
r700SelectFragmentShader(ctx);
- if (context->radeon.NewGLState) {
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- /* mat states from state var not array for sw */
- dummy_attrib[i].stride = 0;
- temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i];
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]);
- }
-
- _tnl_UpdateFixedFunctionProgram(ctx);
-
- for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
- TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i];
- }
- }
-
- r700SelectVertexShader(ctx, 1);
- r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
- context->radeon.NewGLState = 0;
-}
-
-void r700UpdateShaders2(GLcontext * ctx)
-{
- context_t *context = R700_CONTEXT(ctx);
-
- /* should only happenen once, just after context is created */
- /* TODO: shouldn't we fallback to sw here? */
- if (!ctx->FragmentProgram._Current) {
- _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
- return;
- }
-
- r700SelectFragmentShader(ctx);
-
- r700SelectVertexShader(ctx, 2);
+ r700SelectVertexShader(ctx);
r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
context->radeon.NewGLState = 0;
}
@@ -122,7 +85,7 @@ void r700UpdateViewportOffset(GLcontext * ctx) //------------------
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
- __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
GLfloat xoffset = (GLfloat) dPriv->x;
GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -228,6 +191,67 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-----------
context->radeon.NewGLState |= new_state;
}
+static void r700SetDBRenderState(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ R600_STATECHANGE(context, db);
+
+ SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+ SETfield(r700->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, Z_ORDER_shift, Z_ORDER_mask);
+ /* XXX need to enable htile for hiz/s */
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
+ SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+
+ if (context->radeon.query.current)
+ {
+ SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+ {
+ SETbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+ }
+ }
+ else
+ {
+ CLEARbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+ {
+ CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+ }
+ }
+
+ if (fp)
+ {
+ if (fp->r700Shader.killIsUsed)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+
+ if (fp->r700Shader.depthIsExported)
+ {
+ SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ }
+}
+
+void r700UpdateShaderStates(GLcontext * ctx)
+{
+ r700SetDBRenderState(ctx);
+ r600UpdateTextureState(ctx);
+}
+
static void r700SetDepthState(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
@@ -681,6 +705,10 @@ static void r700UpdateCulling(GLcontext * ctx)
CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
break;
}
+
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
}
static void r700UpdateLineStipple(GLcontext * ctx)
@@ -1043,7 +1071,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
{
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
- __DRIdrawablePrivate *dPriv = radeon_get_drawable(&context->radeon);
+ __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
const GLfloat *v = ctx->Viewport._WindowMap.m;
@@ -1067,6 +1095,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
GLfloat tz = v[MAT_TZ] * depthScale;
R600_STATECHANGE(context, vpt);
+ R600_STATECHANGE(context, cl);
r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx;
r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
@@ -1077,6 +1106,18 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz;
r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
+ if (ctx->Transform.DepthClamp) {
+ r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ } else {
+ r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0;
+ r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0;
+ CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ }
+
r700->viewport[id].enabled = GL_TRUE;
r700SetScissor(context);
@@ -1190,13 +1231,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx)
/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
* correctly by selecting the correct front and back face
*/
- if (ctx->Polygon.FrontFace == GL_CCW) {
- f = ctx->Polygon.FrontMode;
- b = ctx->Polygon.BackMode;
- } else {
- f = ctx->Polygon.BackMode;
- b = ctx->Polygon.FrontMode;
- }
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
/* Enable polygon mode */
SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
@@ -1295,11 +1331,15 @@ void r700SetScissor(context_t *context) //---------------
return;
}
if (context->radeon.state.scissor.enabled) {
- /* r600 has exclusive scissors */
x1 = context->radeon.state.scissor.rect.x1;
y1 = context->radeon.state.scissor.rect.y1;
- x2 = context->radeon.state.scissor.rect.x2 + 1;
- y2 = context->radeon.state.scissor.rect.y2 + 1;
+ x2 = context->radeon.state.scissor.rect.x2;
+ y2 = context->radeon.state.scissor.rect.y2;
+ /* r600 has exclusive BR scissors */
+ if (context->radeon.radeonScreen->kernel_mm) {
+ x2++;
+ y2++;
+ }
} else {
if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
x1 = 0;
@@ -1378,8 +1418,6 @@ void r700SetScissor(context_t *context) //---------------
SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
- r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0;
- r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000;
r700->viewport[id].enabled = GL_TRUE;
}
@@ -1686,27 +1724,18 @@ void r700InitState(GLcontext * ctx) //-------------------
r700InitSQConfig(ctx);
r700ColorMask(ctx,
- ctx->Color.ColorMask[RCOMP],
- ctx->Color.ColorMask[GCOMP],
- ctx->Color.ColorMask[BCOMP],
- ctx->Color.ColorMask[ACOMP]);
+ ctx->Color.ColorMask[0][RCOMP],
+ ctx->Color.ColorMask[0][GCOMP],
+ ctx->Color.ColorMask[0][BCOMP],
+ ctx->Color.ColorMask[0][ACOMP]);
r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
r700DepthMask(ctx, ctx->Depth.Mask);
r700DepthFunc(ctx, ctx->Depth.Func);
- SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
-
r700->DB_DEPTH_CLEAR.u32All = 0x3F800000;
-
- r700->DB_RENDER_CONTROL.u32All = 0;
SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit);
SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit);
- r700->DB_RENDER_OVERRIDE.u32All = 0;
- if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
- SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+ r700SetDBRenderState(ctx);
r700->DB_ALPHA_TO_MASK.u32All = 0;
SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h
index 209189d8d7..60c6a7f23c 100644
--- a/src/mesa/drivers/dri/r600/r700_state.h
+++ b/src/mesa/drivers/dri/r600/r700_state.h
@@ -35,7 +35,7 @@
extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
extern void r700UpdateShaders (GLcontext * ctx);
-extern void r700UpdateShaders2(GLcontext * ctx);
+extern void r700UpdateShaderStates(GLcontext * ctx);
extern void r700UpdateViewportOffset(GLcontext * ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index d12c39c9f7..782f151f5a 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
}
}
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[i] = unTotal++;
+ }
+ }
+
return (unTotal - unStart);
}
@@ -179,7 +188,8 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions2(
context->stream_desc[i].size,
context->stream_desc[i].element,
context->stream_desc[i]._signed,
- context->stream_desc[i].normalize,
+ context->stream_desc[i].normalize,
+ context->stream_desc[i].format,
&vtxFetchMethod);
}
@@ -203,22 +213,11 @@ void Map_Vertex_Program(GLcontext *ctx,
pAsm->number_used_registers += num_inputs;
// Create VFETCH instructions for inputs
- if(1 == vp->uiVersion)
- {
- if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
- {
- radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
- return;
- }
- }
- else
- {
- if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
- {
- radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
- return;
- }
- }
+ if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
+ {
+ radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
+ return;
+ }
// Map Outputs
pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
@@ -228,7 +227,7 @@ void Map_Vertex_Program(GLcontext *ctx,
pAsm->number_used_registers += pAsm->number_of_exports;
pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
-
+
for(ui=0; ui<pAsm->number_of_exports; ui++)
{
pAsm->pucOutMask[ui] = 0x0;
@@ -245,7 +244,9 @@ void Map_Vertex_Program(GLcontext *ctx,
{ /* fix func t_vp uses NumTemporaries */
pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
}
-
+
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
@@ -300,18 +301,13 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
}
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp,
- GLint nVer)
+ struct gl_vertex_program *mesa_vp)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int unBit;
unsigned int i;
vp = _mesa_calloc(sizeof(*vp));
- vp->uiVersion = nVer;
vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
if (mesa_vp->IsPositionInvariant)
@@ -319,29 +315,14 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->mesa_program);
}
- if( 1 == nVer )
+ for(i=0; i<context->nNumActiveAos; i++)
{
- for(i=0; i<VERT_ATTRIB_MAX; i++)
- {
- unBit = 1 << i;
- if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
- {
- vp->aos_desc[i].size = vb->AttribPtr[i]->size;
- vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
- vp->aos_desc[i].type = GL_FLOAT;
- }
- }
+ vp->aos_desc[i].size = context->stream_desc[i].size;
+ vp->aos_desc[i].stride = context->stream_desc[i].stride;
+ vp->aos_desc[i].type = context->stream_desc[i].type;
+ vp->aos_desc[i].format = context->stream_desc[i].format;
}
- else
- {
- for(i=0; i<context->nNumActiveAos; i++)
- {
- vp->aos_desc[i].size = context->stream_desc[i].size;
- vp->aos_desc[i].stride = context->stream_desc[i].stride;
- vp->aos_desc[i].type = context->stream_desc[i].type;
- }
- }
-
+
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
{
vp->r700AsmCode.bR6xx = 1;
@@ -354,20 +335,36 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
{
return NULL;
+ }
+
+ InitShaderProgram(&(vp->r700AsmCode));
+
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
}
- if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
- &(vp->mesa_program->Base.Instructions[0]),
+ vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
+
+ if(GL_FALSE == AssembleInstr(0,
+ 0,
+ vp->mesa_program->Base.NumInstructions,
+ &(vp->mesa_program->Base.Instructions[0]),
&(vp->r700AsmCode)) )
{
return NULL;
- }
+ }
if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
{
return NULL;
}
+ if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
+ {
+ return GL_FALSE;
+ }
+
vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
: (vp->r700AsmCode.number_used_registers - 1);
@@ -378,14 +375,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
return vp;
}
-void r700SelectVertexShader(GLcontext *ctx, GLint nVersion)
+void r700SelectVertexShader(GLcontext *ctx)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program_cont *vpc;
struct r700_vertex_program *vp;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
- unsigned int unBit;
unsigned int i;
GLboolean match;
GLbitfield InputsRead;
@@ -396,47 +390,28 @@ void r700SelectVertexShader(GLcontext *ctx, GLint nVersion)
if (vpc->mesa_program.IsPositionInvariant)
{
InputsRead |= VERT_BIT_POS;
- }
-
+ }
+
for (vp = vpc->progs; vp; vp = vp->next)
{
- if (vp->uiVersion != nVersion )
- continue;
- match = GL_TRUE;
- if ( 1 == nVersion )
+ match = GL_TRUE;
+ for(i=0; i<context->nNumActiveAos; i++)
{
- for(i=0; i<VERT_ATTRIB_MAX; i++)
- {
- unBit = 1 << i;
- if(InputsRead & unBit)
+ if (vp->aos_desc[i].size != context->stream_desc[i].size ||
+ vp->aos_desc[i].format != context->stream_desc[i].format)
{
- if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
- {
match = GL_FALSE;
break;
- }
}
- }
}
- else
- {
- for(i=0; i<context->nNumActiveAos; i++)
- {
- if (vp->aos_desc[i].size != context->stream_desc[i].size)
- {
- match = GL_FALSE;
- break;
- }
- }
- }
- if (match)
+ if (match)
{
context->selected_vp = vp;
return;
}
}
- vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion);
+ vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
if(!vp)
{
radeon_error("Failed to translate vertex shader. \n");
@@ -526,6 +501,7 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s
pStreamDesc->size = input->Size;
pStreamDesc->dst_loc = context->nNumActiveAos;
pStreamDesc->element = unLoc;
+ pStreamDesc->format = input->Format;
switch (pStreamDesc->type)
{ //GetSurfaceFormat
@@ -570,6 +546,11 @@ void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[],
unsigned int unBit = mesa_vp->Base.InputsRead;
context->nNumActiveAos = 0;
+ if (mesa_vp->IsPositionInvariant)
+ {
+ unBit |= VERT_BIT_POS;
+ }
+
while(unBit)
{
if(unBit & 1)
@@ -580,6 +561,7 @@ void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[],
unBit >>= 1;
++unLoc;
}
+ context->radeon.tcl.aos_count = context->nNumActiveAos;
}
void * r700GetActiveVpShaderBo(GLcontext * ctx)
@@ -661,6 +643,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
paramList = vp->mesa_program->Base.Parameters;
if(NULL != paramList) {
+ /* vp->mesa_program was cloned, not updated by glsl shader api. */
+ /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
+ /* so, use ctx->VertexProgem._Current */
+ struct gl_program_parameter_list *paramListOrginal =
+ paramListOrginal = ctx->VertexProgram._Current->Base.Parameters;
+
_mesa_load_state_parameters(ctx, paramList);
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
@@ -673,13 +661,42 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
+ {
+ r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
+ r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
+ r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
+ r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ }
+ else
+ {
+ r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
}
} else
r700->vs.num_consts = 0;
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = r700->vs.num_consts;
+ for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
+ {
+ pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
+
+ r700->vs.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h
index f9a3e395ee..645c9ac84a 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.h
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.h
@@ -39,6 +39,7 @@ typedef struct ArrayDesc //TEMP
GLint size; //number of data element
GLenum type; //data element type
GLsizei stride;
+ GLenum format; //GL_RGBA or GL_BGRA
} ArrayDesc;
struct r700_vertex_program
@@ -52,8 +53,7 @@ struct r700_vertex_program
GLboolean translated;
GLboolean loaded;
- GLint uiVersion;
-
+
void * shaderbo;
ArrayDesc aos_desc[VERT_ATTRIB_MAX];
@@ -87,11 +87,10 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp,
- GLint nVer);
+ struct gl_vertex_program *mesa_vp);
/* Interface */
-extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion);
+extern void r700SelectVertexShader(GLcontext *ctx);
extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count);
extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
diff --git a/src/mesa/drivers/dri/r600/radeon_bo.c b/src/mesa/drivers/dri/r600/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs.c b/src/mesa/drivers/dri/r600/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h \ No newline at end of file