From e2dcebd2e6b2af6269a5ece6d6ced73ec8bb4a47 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Fri, 8 May 2009 19:23:45 -0400 Subject: R6xx/R7xx: WIP r6xx-rewrite code --- src/mesa/drivers/dri/r600/Makefile | 3 + src/mesa/drivers/dri/r600/r600_cmdbuf.c | 589 +++++++++++++++++++++++++++++- src/mesa/drivers/dri/r600/r600_cmdbuf.h | 167 +++++++++ src/mesa/drivers/dri/r600/r600_context.c | 69 +--- src/mesa/drivers/dri/r600/r600_context.h | 39 +- src/mesa/drivers/dri/r600/r600_emit.c | 224 +++++++++++- src/mesa/drivers/dri/r600/r600_emit.h | 257 +++---------- src/mesa/drivers/dri/r600/r700_chip.c | 248 ++++++++++++- src/mesa/drivers/dri/r600/r700_chip.h | 11 +- src/mesa/drivers/dri/r600/r700_clear.c | 117 +++--- src/mesa/drivers/dri/r600/r700_fragprog.c | 50 +-- src/mesa/drivers/dri/r600/r700_fragprog.h | 8 +- src/mesa/drivers/dri/r600/r700_ioctl.c | 28 +- src/mesa/drivers/dri/r600/r700_oglprog.c | 26 +- src/mesa/drivers/dri/r600/r700_render.c | 217 +++++------ src/mesa/drivers/dri/r600/r700_state.c | 265 ++++++++------ src/mesa/drivers/dri/r600/r700_state.h | 1 + src/mesa/drivers/dri/r600/r700_tex.c | 24 +- src/mesa/drivers/dri/r600/r700_tex.h | 2 + src/mesa/drivers/dri/r600/r700_vertprog.c | 80 ++-- src/mesa/drivers/dri/r600/r700_vertprog.h | 10 +- 21 files changed, 1697 insertions(+), 738 deletions(-) (limited to 'src/mesa/drivers/dri/r600') diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 00c56c03cd..474a3167b4 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -48,9 +48,12 @@ DRIVER_SOURCES = \ r700_fragprog.c \ r700_vertprog.c \ r700_shader.c \ + r700_shaderinst.c \ r700_ioctl.c \ r700_oglprog.c \ r700_chip.c \ + r700_state.c \ + r700_clear.c \ r700_render.c \ r700_tex.c \ r700_debug.c \ diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index bc14f9a755..8550457566 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -28,11 +28,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ /** - * \file - * - * \author Nicolai Haehnle + * Mostly coppied from \radeon\radeon_cs_legacy.c */ +#include + #include "main/glheader.h" #include "main/state.h" #include "main/imports.h" @@ -52,11 +52,582 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_mipmap_tree.h" #include "radeon_reg.h" -/** - * Allocate memory for the command buffer and initialize the state atom - * list. Note that the initial hardware state is set by r600InitState(). - */ -void r600InitCmdBuf(r600ContextPtr r600) +struct r600_cs_manager_legacy { - rcommonInitCmdBuf(&r600->radeon); + struct radeon_cs_manager base; + struct radeon_context *ctx; + /* hack for scratch stuff */ + uint32_t pending_age; + uint32_t pending_count; +}; + +struct r600_cs_reloc_legacy { + struct radeon_cs_reloc base; + uint32_t cindices; + uint32_t *indices; + uint32_t *reloc_indices; + struct offset_modifiers offset_mod; +}; + +static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) +{ + struct radeon_cs *cs; + + cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); + if (cs == NULL) { + return NULL; + } + cs->csm = csm; + cs->ndw = (ndw + 0x3FF) & (~0x3FF); + cs->packets = (uint32_t*)malloc(4*cs->ndw); + if (cs->packets == NULL) { + free(cs); + return NULL; + } + cs->relocs_total_size = 0; + return cs; +} + +int r600_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags, + offset_modifiers* poffset_mod) +{ + struct r600_cs_reloc_legacy *relocs; + int i; + + relocs = (struct r600_cs_reloc_legacy *)cs->relocs; + /* check domains */ + if ((read_domain && write_domain) || (!read_domain && !write_domain)) { + /* in one CS a bo can only be in read or write domain but not + * in read & write domain at the same sime + */ + return -EINVAL; + } + if (read_domain == RADEON_GEM_DOMAIN_CPU) { + return -EINVAL; + } + if (write_domain == RADEON_GEM_DOMAIN_CPU) { + return -EINVAL; + } + /* check if bo is already referenced */ + for(i = 0; i < cs->crelocs; i++) { + uint32_t *indices; + uint32_t *reloc_indices; + + if (relocs[i].base.bo->handle == bo->handle) { + /* Check domains must be in read or write. As we check already + * checked that in argument one of the read or write domain was + * set we only need to check that if previous reloc as the read + * domain set then the read_domain should also be set for this + * new relocation. + */ + if (relocs[i].base.read_domain && !read_domain) { + return -EINVAL; + } + if (relocs[i].base.write_domain && !write_domain) { + return -EINVAL; + } + relocs[i].base.read_domain |= read_domain; + relocs[i].base.write_domain |= write_domain; + /* save indice */ + relocs[i].cindices++; + indices = (uint32_t*)realloc(relocs[i].indices, + relocs[i].cindices * 4); + reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices, + relocs[i].cindices * 4); + if ( (indices == NULL) || (reloc_indices == NULL) ) { + relocs[i].cindices -= 1; + return -ENOMEM; + } + relocs[i].indices = indices; + relocs[i].reloc_indices = reloc_indices; + relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; + relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->section_cdw; + cs->section_ndw += 2; + cs->section_cdw += 2; + + relocs[i].offset_mod.shift = poffset_mod->shift; + relocs[i].offset_mod.shiftbits = poffset_mod->shiftbits; + relocs[i].offset_mod.mask = poffset_mod->mask; + + return 0; + } + } + /* add bo to reloc */ + relocs = (struct r600_cs_reloc_legacy*) + realloc(cs->relocs, + sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1)); + if (relocs == NULL) { + return -ENOMEM; + } + cs->relocs = relocs; + relocs[cs->crelocs].base.bo = bo; + relocs[cs->crelocs].base.read_domain = read_domain; + relocs[cs->crelocs].base.write_domain = write_domain; + relocs[cs->crelocs].base.flags = flags; + relocs[cs->crelocs].indices = (uint32_t*)malloc(4); + relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4); + if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) ) + { + return -ENOMEM; + } + relocs[cs->crelocs].offset_mod.shift = poffset_mod->shift; + relocs[cs->crelocs].offset_mod.shiftbits = poffset_mod->shiftbits; + relocs[cs->crelocs].offset_mod.mask = poffset_mod->mask; + + relocs[cs->crelocs].indices[0] = cs->cdw - 1; + relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw; + cs->section_ndw += 2; + cs->section_cdw += 2; + relocs[cs->crelocs].cindices = 1; + cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); + cs->crelocs++; + + radeon_bo_ref(bo); + return 0; +} + +static int r600_cs_begin(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, + int line) +{ + if (cs->section) { + fprintf(stderr, "CS already in a section(%s,%s,%d)\n", + cs->section_file, cs->section_func, cs->section_line); + fprintf(stderr, "CS can't start section(%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + + if (cs->cdw + ndw + 32 > cs->ndw) { /* Left 32 DWORD (8 offset+pitch) spare room for reloc indices */ + uint32_t tmp, *ptr; + int num = (ndw > 0x3FF) ? ndw : 0x3FF; + + tmp = (cs->cdw + 1 + num) & (~num); + ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); + if (ptr == NULL) { + return -ENOMEM; + } + cs->packets = ptr; + cs->ndw = tmp; + } + + cs->section = 1; + cs->section_ndw = 0; + cs->section_cdw = cs->cdw + ndw; /* start of reloc indices. */ + cs->section_file = file; + cs->section_func = func; + cs->section_line = line; + + return 0; +} + +static int r600_cs_end(struct radeon_cs *cs, + const char *file, + const char *func, + int line) + +{ + if (!cs->section) { + fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + cs->section = 0; + + if ( (cs->section_ndw + cs->cdw) != cs->section_cdw ) + { + fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", + cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); + fprintf(stderr, "CS section end at (%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + + cs->cdw = cs->section_cdw; + return 0; +} + +static int r600_cs_process_relocs(struct radeon_cs *cs, + uint32_t * reloc_chunk, + uint32_t * length_dw_reloc_chunk) +{ + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct r600_cs_reloc_legacy *relocs; + int i, j, r; + + uint32_t offset_dw = 0; + + csm = (struct r600_cs_manager_legacy*)cs->csm; + relocs = (struct r600_cs_reloc_legacy *)cs->relocs; +restart: + for (i = 0; i < cs->crelocs; i++) + { + for (j = 0; j < relocs[i].cindices; j++) + { + uint32_t soffset, eoffset, asicoffset; + + r = radeon_bo_legacy_validate(relocs[i].base.bo, + &soffset, &eoffset); + if (r == -EAGAIN) + { + goto restart; + } + if (r) + { + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + return r; + } + asicoffset = cs->packets[relocs[i].indices[j]] + soffset; + if (asicoffset >= eoffset) + { + /* radeon_bo_debug(relocs[i].base.bo, 12); */ + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", + relocs[i].base.bo, + cs->packets[relocs[i].indices[j]], + eoffset); + exit(0); + return -EINVAL; + } + /* apply offset operator */ + switch (relocs[i].offset_mod.shift) + { + case NO_SHIFT: + asicoffset = asicoffset & relocs[i].offset_mod.mask; + break; + case LEFT_SHIFT: + asicoffset = (asicoffset << relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; + break; + case RIGHT_SHIFT: + asicoffset = (asicoffset >> relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask; + break; + default: + break; + }; + + /* pkt3 nop header in ib chunk */ + cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; + + /* reloc index in ib chunk */ + cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw; + + /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ + reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw + 3] = 0; + + offset_dw += 4; + } + } + + *length_dw_reloc_chunk = offset_dw; + + return 0; +} + +static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */ +{ + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct r600_cs_reloc_legacy *relocs; + int i; + + relocs = (struct r600_cs_reloc_legacy *)cs->relocs; + for (i = 0; i < cs->crelocs; i++) { + radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age); + radeon_bo_unref(relocs[i].base.bo); + } + return 0; +} + +static int r600_cs_emit(struct radeon_cs *cs) +{ + struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm; + struct drm_radeon_cs cs_cmd; + struct drm_radeon_cs_chunk cs_chunk[2]; + drm_radeon_cmd_buffer_t cmd; + /* drm_r300_cmd_header_t age; */ + uint32_t length_dw_reloc_chunk; + uint64_t ull; + uint64_t * chunk_ptrs[2]; + uint32_t reloc_chunk[128]; + int r; + int retry = 0; + + /* TODO : put chip level things here if need. */ + /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */ + + /* TODO : append buffer age */ + + r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk); + if (r) { + return 0; + } + + /* raw ib chunk */ + cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB; + cs_chunk[0].length_dw = cs->cdw; + cs_chunk[0].chunk_data = (uint64_t)(cs->packets); + + /* reloc chaunk */ + cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + cs_chunk[1].length_dw = length_dw_reloc_chunk; + cs_chunk[1].chunk_data = (uint64_t)&(reloc_chunk[0]); + + chunk_ptrs[0] = (uint64_t * )&(cs_chunk[0]); + chunk_ptrs[1] = (uint64_t * )&(cs_chunk[1]); + + cs_cmd.num_chunks = 2; + cs_cmd.cs_id = 0; + cs_cmd.chunks = (uint64_t)&(chunk_ptrs[0]); + + /* dump_cmdbuf(cs); */ + + do + { + r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd)); + retry++; + } while (r == -EAGAIN && retry < 1000); + + if (r) { + return r; + } + + r600_cs_set_age(cs); + + cs->csm->read_used = 0; + cs->csm->vram_write_used = 0; + cs->csm->gart_write_used = 0; + + return 0; +} + +static void inline r600_cs_free_reloc(void *relocs_p, int crelocs) +{ + struct r600_cs_reloc_legacy *relocs = relocs_p; + int i; + if (!relocs_p) + return; + for (i = 0; i < crelocs; i++) + { + free(relocs[i].indices); + free(relocs[i].reloc_indices); + } +} + +static int r600_cs_destroy(struct radeon_cs *cs) +{ + r600_cs_free_reloc(cs->relocs, cs->crelocs); + free(cs->relocs); + free(cs->packets); + free(cs); + return 0; +} + +static int r600_cs_erase(struct radeon_cs *cs) +{ + r600_cs_free_reloc(cs->relocs, cs->crelocs); + free(cs->relocs); + cs->relocs_total_size = 0; + cs->relocs = NULL; + cs->crelocs = 0; + cs->cdw = 0; + cs->section = 0; + return 0; +} + +static int r600_cs_need_flush(struct radeon_cs *cs) +{ + /* this function used to flush when the BO usage got to + * a certain size, now the higher levels handle this better */ + return 0; +} + +static void r600_cs_print(struct radeon_cs *cs, FILE *file) +{ +} + +static int r600_cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo) +{ + struct radeon_cs_manager *csm = cs->csm; + int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0; + uint32_t read_domains, write_domain; + int i; + struct radeon_bo *bo; + + /* check the totals for this operation */ + + if (num_bo == 0) + return 0; + + /* prepare */ + for (i = 0; i < num_bo; i++) + { + bo = bos[i].bo; + + bos[i].new_accounted = 0; + read_domains = bos[i].read_domains; + write_domain = bos[i].write_domain; + + /* pinned bos don't count */ + if (radeon_legacy_bo_is_static(bo)) + continue; + + /* already accounted this bo */ + if (write_domain && (write_domain == bo->space_accounted)) + continue; + + if (read_domains && ((read_domains << 16) == bo->space_accounted)) + continue; + + if (bo->space_accounted == 0) + { + if (write_domain == RADEON_GEM_DOMAIN_VRAM) + this_op_vram_write += bo->size; + else if (write_domain == RADEON_GEM_DOMAIN_GTT) + this_op_gart_write += bo->size; + else + this_op_read += bo->size; + bos[i].new_accounted = (read_domains << 16) | write_domain; + } + else + { + uint16_t old_read, old_write; + + old_read = bo->space_accounted >> 16; + old_write = bo->space_accounted & 0xffff; + + if (write_domain && (old_read & write_domain)) + { + bos[i].new_accounted = write_domain; + /* moving from read to a write domain */ + if (write_domain == RADEON_GEM_DOMAIN_VRAM) + { + this_op_read -= bo->size; + this_op_vram_write += bo->size; + } + else if (write_domain == RADEON_GEM_DOMAIN_VRAM) + { + this_op_read -= bo->size; + this_op_gart_write += bo->size; + } + } + else if (read_domains & old_write) + { + bos[i].new_accounted = bo->space_accounted & 0xffff; + } + else + { + /* rewrite the domains */ + if (write_domain != old_write) + fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write); + if (read_domains != old_read) + fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read); + return RADEON_CS_SPACE_FLUSH; + } + } + } + + if (this_op_read < 0) + this_op_read = 0; + + /* check sizes - operation first */ + if ((this_op_read + this_op_gart_write > csm->gart_limit) || + (this_op_vram_write > csm->vram_limit)) { + return RADEON_CS_SPACE_OP_TO_BIG; + } + + if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) || + ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) { + return RADEON_CS_SPACE_FLUSH; + } + + csm->gart_write_used += this_op_gart_write; + csm->vram_write_used += this_op_vram_write; + csm->read_used += this_op_read; + /* commit */ + for (i = 0; i < num_bo; i++) { + bo = bos[i].bo; + bo->space_accounted = bos[i].new_accounted; + } + + return RADEON_CS_SPACE_OK; +} + +static struct radeon_cs_funcs r600_cs_funcs = { + r600_cs_create, + r600_cs_write_reloc, + r600_cs_begin, + r600_cs_end, + r600_cs_emit, + r600_cs_destroy, + r600_cs_erase, + r600_cs_need_flush, + r600_cs_print, + r600_cs_check_space +}; + +struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx) +{ + struct r600_cs_manager_legacy *csm; + + csm = (struct r600_cs_manager_legacy*) + calloc(1, sizeof(struct r600_cs_manager_legacy)); + if (csm == NULL) { + return NULL; + } + csm->base.funcs = &r600_cs_funcs; + csm->base.fd = ctx->dri.fd; + csm->ctx = ctx; + csm->pending_age = 1; + return (struct radeon_cs_manager*)csm; +} + +void r600InitCmdBuf(r600ContextPtr r600) /* from rcommonInitCmdBuf */ +{ + radeonContextPtr rmesa = &r600->radeon; + + GLuint size; + /* Initialize command buffer */ + size = 256 * driQueryOptioni(&rmesa->optionCache, + "command_buffer_size"); + if (size < 2 * rmesa->hw.max_state_size) { + size = 2 * rmesa->hw.max_state_size + 65535; + } + if (size > 64 * 256) + size = 64 * 256; + + if (rmesa->radeonScreen->kernel_mm) { + int fd = rmesa->radeonScreen->driScreen->fd; + rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd); + } else { + rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa); + } + if (rmesa->cmdbuf.csm == NULL) { + /* FIXME: fatal error */ + return; + } + rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size); + assert(rmesa->cmdbuf.cs != NULL); + rmesa->cmdbuf.size = size; + + if (!rmesa->radeonScreen->kernel_mm) { + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); + } else { + struct drm_radeon_gem_info mminfo; + + if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) + { + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible); + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size); + } + } } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h index 1bab9c5821..dbb723b2b5 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -37,6 +37,173 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define __R600_CMDBUF_H__ #include "r600_context.h" +#include "r600_emit.h" + +#define RADEON_CP_PACKET3_NOP 0xC0001000 +#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900 +#define RADEON_CP_PACKET3_PLY_NEXTSCAN 0xC0001D00 +#define RADEON_CP_PACKET3_SET_SCISSORS 0xC0001E00 +#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM 0xC0002300 +#define RADEON_CP_PACKET3_LOAD_MICROCODE 0xC0002400 +#define RADEON_CP_PACKET3_WAIT_FOR_IDLE 0xC0002600 +#define RADEON_CP_PACKET3_3D_DRAW_VBUF 0xC0002800 +#define RADEON_CP_PACKET3_3D_DRAW_IMMD 0xC0002900 +#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 +#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 +#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 +#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 +#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 +#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT 0xC0009400 +#define RADEON_CP_PACKET3_CNTL_POLYLINE 0xC0009500 +#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES 0xC0009800 +#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI 0xC0009A00 +#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI 0xC0009B00 +#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT 0xC0009C00 + +/* r6xx/r7xx packet 3 type offsets */ +#define R600_SET_CONFIG_REG_OFFSET 0x00008000 +#define R600_SET_CONFIG_REG_END 0x0000ac00 +#define R600_SET_CONTEXT_REG_OFFSET 0x00028000 +#define R600_SET_CONTEXT_REG_END 0x00029000 +#define R600_SET_ALU_CONST_OFFSET 0x00030000 +#define R600_SET_ALU_CONST_END 0x00032000 +#define R600_SET_RESOURCE_OFFSET 0x00038000 +#define R600_SET_RESOURCE_END 0x0003c000 +#define R600_SET_SAMPLER_OFFSET 0x0003c000 +#define R600_SET_SAMPLER_END 0x0003cff0 +#define R600_SET_CTL_CONST_OFFSET 0x0003cff0 +#define R600_SET_CTL_CONST_END 0x0003e200 +#define R600_SET_LOOP_CONST_OFFSET 0x0003e200 +#define R600_SET_LOOP_CONST_END 0x0003e380 +#define R600_SET_BOOL_CONST_OFFSET 0x0003e380 +#define R600_SET_BOOL_CONST_END 0x00040000 + +/* r6xx/r7xx packet 3 types */ +#define R600_IT_INDIRECT_BUFFER_END 0x00001700 +#define R600_IT_SET_PREDICATION 0x00002000 +#define R600_IT_REG_RMW 0x00002100 +#define R600_IT_COND_EXEC 0x00002200 +#define R600_IT_PRED_EXEC 0x00002300 +#define R600_IT_START_3D_CMDBUF 0x00002400 +#define R600_IT_DRAW_INDEX_2 0x00002700 +#define R600_IT_CONTEXT_CONTROL 0x00002800 +#define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900 +#define R600_IT_INDEX_TYPE 0x00002A00 +#define R600_IT_DRAW_INDEX 0x00002B00 +#define R600_IT_DRAW_INDEX_AUTO 0x00002D00 +#define R600_IT_DRAW_INDEX_IMMD 0x00002E00 +#define R600_IT_NUM_INSTANCES 0x00002F00 +#define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400 +#define R600_IT_INDIRECT_BUFFER_MP 0x00003800 +#define R600_IT_MEM_SEMAPHORE 0x00003900 +#define R600_IT_MPEG_INDEX 0x00003A00 +#define R600_IT_WAIT_REG_MEM 0x00003C00 +#define R600_IT_MEM_WRITE 0x00003D00 +#define R600_IT_INDIRECT_BUFFER 0x00003200 +#define R600_IT_CP_INTERRUPT 0x00004000 +#define R600_IT_SURFACE_SYNC 0x00004300 +#define R600_IT_ME_INITIALIZE 0x00004400 +#define R600_IT_COND_WRITE 0x00004500 +#define R600_IT_EVENT_WRITE 0x00004600 +#define R600_IT_EVENT_WRITE_EOP 0x00004700 +#define R600_IT_ONE_REG_WRITE 0x00005700 +#define R600_IT_SET_CONFIG_REG 0x00006800 +#define R600_IT_SET_CONTEXT_REG 0x00006900 +#define R600_IT_SET_ALU_CONST 0x00006A00 +#define R600_IT_SET_BOOL_CONST 0x00006B00 +#define R600_IT_SET_LOOP_CONST 0x00006C00 +#define R600_IT_SET_RESOURCE 0x00006D00 +#define R600_IT_SET_SAMPLER 0x00006E00 +#define R600_IT_SET_CTL_CONST 0x00006F00 +#define R600_IT_SURFACE_BASE_UPDATE 0x00007300 + +extern int r600_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags, + offset_modifiers* poffset_mod); + +static inline void r600_cs_write_dword(struct radeon_cs *cs, uint32_t dword) +{ + cs->packets[cs->cdw++] = dword; +} + +/** + * Write one dword to the command buffer. + */ +#define R600_OUT_BATCH(data) \ + do { \ + r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\ + } while(0) + +/** + * Write n dwords from ptr to the command buffer. + */ +#define R600_OUT_BATCH_TABLE(ptr,n) \ + do { \ + int _i; \ + for (_i=0; _i < n; _i++) {\ + r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\ + }\ + } while(0) + +/** + * Write a relocated dword to the command buffer. + */ +#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags, offset_mod) \ + do { \ + if (0 && offset) { \ + fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ + __FILE__, __FUNCTION__, __LINE__, offset); \ + } \ + r600_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ + r600_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ + bo, rd, wd, flags, offset_mod); \ + } while(0) + +/* R600/R700 */ +#define R600_OUT_BATCH_REGS(reg, num) \ +do { \ + if ((reg) >= R600_SET_CONFIG_REG_OFFSET && (reg) < R600_SET_CONFIG_REG_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, (num))); \ + OUT_BATCH(((reg) - R600_SET_CONFIG_REG_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_CONTEXT_REG_OFFSET && (reg) < R600_SET_CONTEXT_REG_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONTEXT_REG, (num))); \ + OUT_BATCH(((reg) - R600_SET_CONTEXT_REG_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_ALU_CONST_OFFSET && (reg) < R600_SET_ALU_CONST_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (num))); \ + OUT_BATCH(((reg) - R600_SET_ALU_CONST_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_RESOURCE_OFFSET && (reg) < R600_SET_RESOURCE_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, (num))); \ + OUT_BATCH(((reg) - R600_SET_RESOURCE_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_SAMPLER_OFFSET && (reg) < R600_SET_SAMPLER_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, (num))); \ + OUT_BATCH(((reg) - R600_SET_SAMPLER_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_CTL_CONST_OFFSET && (reg) < R600_SET_CTL_CONST_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, (num))); \ + OUT_BATCH(((reg) - R600_SET_CTL_CONST_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_LOOP_CONST_OFFSET && (reg) < R600_SET_LOOP_CONST_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_LOOP_CONST, (num))); \ + OUT_BATCH(((reg) - R600_SET_LOOP_CONST_OFFSET) >> 2); \ + } else if ((reg) >= R600_SET_BOOL_CONST_OFFSET && (reg) < R600_SET_BOOL_CONST_END) { \ + OUT_BATCH(CP_PACKET3(R600_IT_SET_BOOL_CONST, (num))); \ + OUT_BATCH(((reg) - R600_SET_BOOL_CONST_OFFSET) >> 2); \ + } else { \ + OUT_BATCH(CP_PACKET0((reg), (num))); \ + } \ +} while (0) + +/** Single register write to command buffer; requires 3 dwords for most things. */ +#define R600_OUT_BATCH_REGVAL(reg, val) \ + R600_OUT_BATCH_REGS((reg), 1); \ + OUT_BATCH((val)) + +/** Continuous register range write to command buffer; requires 1 dword, + * expects count dwords afterwards for register contents. */ +#define R600_OUT_BATCH_REGSEQ(reg, count) \ + R600_OUT_BATCH_REGS((reg), (count)) extern void r600InitCmdBuf(r600ContextPtr r600); diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 1b43f68ac2..3c8ba003d4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -156,42 +156,6 @@ const struct dri_extension gl_20_extension[] = { }; -extern struct tnl_pipeline_stage _r600_render_stage; -extern const struct tnl_pipeline_stage _r600_tcl_stage; - -static const struct tnl_pipeline_stage *r600_pipeline[] = { - - /* Try and go straight to t&l - */ - &_r600_tcl_stage, - - /* Catch any t&l fallbacks - */ - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - /* Try again to go to tcl? - * - no good for asymmetric-twoside (do with multipass) - * - no good for asymmetric-unfilled (do with multipass) - * - good for material - * - good for texgen - * - need to manipulate a bit of state - * - * - worth it/not worth it? - */ - - /* Else do them here. - */ - &_r600_render_stage, - &_tnl_render_stage, /* FALLBACK */ - 0, -}; - static void r600RunPipeline(GLcontext * ctx) { _mesa_lock_context_textures(ctx); @@ -244,31 +208,6 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = r600_fallback; } -/* to be enabled */ -static void r600EmitShader(GLcontext * ctx, - struct r600_dma_region *rvb, - GLvoid * data, - int sizeinDWORD) -{ -} -/* to be enabled */ -static void r600FreeDmaRegion(context_t *context, - struct r600_dma_region *region) -{ -} -/* to be enabled */ -static void r600EmitVec(GLcontext * ctx, - struct r600_dma_region *rvb, - GLvoid * data, - int size, - int stride, - int count) -{ -} -/* to be enabled */ -static void r600ReleaseArrays(GLcontext * ctx) -{ -} /* to be enabled */ static GLboolean r600LoadMemSurf(context_t *context, GLuint dst_offset, /* gpu addr */ @@ -287,6 +226,7 @@ static GLboolean r600AllocMemSurf(context_t *context, GLuint *prefered_heap, /* Now used RADEON_LOCAL_TEX_HEAP, return actual heap used. */ GLuint totalSize) { + return GL_TRUE; } /* to be enabled */ static int r600FlushCmdBuffer(context_t *context) @@ -332,6 +272,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, */ driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, screen->driScreen->myNum, "r600"); + r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, "def_max_anisotropy"); @@ -344,9 +285,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, (r600->chipobj.InitFuncs)(&functions); r600->chipobj.EmitShader = r600EmitShader; + r600->chipobj.DeleteShader = r600DeleteShader; r600->chipobj.FreeDmaRegion = r600FreeDmaRegion; r600->chipobj.EmitVec = r600EmitVec; - r600->chipobj.ReleaseArrays = r600ReleaseArrays; + r600->chipobj.ReleaseArrays = r600ReleaseVec; r600->chipobj.LoadMemSurf = r600LoadMemSurf; r600->chipobj.AllocMemSurf = r600AllocMemSurf; r600->chipobj.FlushCmdBuffer = r600FlushCmdBuffer; @@ -476,8 +418,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs( ctx ); r600InitCmdBuf(r600); + + (r600->chipobj.InitState)(r600->radeon.glCtx); #if 0 /* to be enabled */ - r600InitState(r600); if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) r600InitSwtcl(ctx); #endif diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 74e34e3684..70108b74c4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -140,26 +140,19 @@ struct r600_swtcl_info { GLubyte vertex_attr_count; }; -/* to be enabled */ /* DELETE FOLLOWING 2 SRTUCTS */ -struct r600_dma_buffer +enum { - int refcount; /* the number of retained regions in buf */ - drmBufPtr buf; - int id; + NO_SHIFT = 0, + LEFT_SHIFT = 1, + RIGHT_SHIFT = 2, }; -struct r600_dma_region + +typedef struct offset_modifiers { - struct r600_dma_buffer *buf; - char *address; /* == buf->address */ - int start; - int end; - int ptr; /* offsets from start of buf */ - - int aos_offset; /* address in GART memory */ - int aos_stride; /* distance between elements, in dwords */ - int aos_size; /* number of components (1-4) */ -}; -/* ----------------------- */ + GLuint shift; + GLuint shiftbits; + GLuint mask; +} offset_modifiers; typedef struct chip_object { @@ -175,14 +168,16 @@ typedef struct chip_object GLuint (*GetTexObjSize)(void); /* ------------ IN ------------------- */ - void (*EmitShader)( GLcontext * ctx, - struct r600_dma_region *rvb, + GLboolean (*EmitShader)( GLcontext * ctx, + void ** shaderbo, GLvoid * data, int sizeinDWORD); + GLboolean (*DeleteShader)(GLcontext * ctx, + void * shaderbo); void (*FreeDmaRegion)( GLcontext * ctx, - struct r600_dma_region *region); - void (*EmitVec)(GLcontext * ctx, - struct r600_dma_region *rvb, + void * shaderbo); + GLboolean (*EmitVec)(GLcontext * ctx, + struct radeon_aos *aos, GLvoid * data, int size, int stride, diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index 9d43873c4d..fc8f987cab 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -1,9 +1,8 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. +/************************************************************************** + +Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD) -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. +Copyright (C) Advanced Micro Devices Inc. (AMD) 2009. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -27,10 +26,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/** - * \file - * - * \author Keith Whitwell +/* + * Authors: + * Richard Li , + * CooperYuan , */ #include "main/glheader.h" @@ -48,6 +47,105 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_context.h" #include "r600_emit.h" +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ +} while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ +} while (0) +#endif + +static void r600EmitVec4(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 4) + COPY_DWORDS(out, data, count); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out++; + data += stride; + } +} + +static void r600EmitVec8(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 8) + COPY_DWORDS(out, data, count * 2); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out += 2; + data += stride; + } +} + +static void r600EmitVec12(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 12) { + COPY_DWORDS(out, data, count * 3); + } + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out += 3; + data += stride; + } +} + +static void r600EmitVec16(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 16) + COPY_DWORDS(out, data, count * 4); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out[3] = *(int *)(data + 12); + out += 4; + data += stride; + } +} + /* Emit vertex data to GART memory * Route inputs to the vertex processor * This function should never return R600_FALLBACK_TCL when using software tcl. @@ -73,3 +171,111 @@ void r600EmitCacheFlush(r600ContextPtr rmesa) COMMIT_BATCH(); */ } + +GLboolean r600EmitShader(GLcontext * ctx, + void ** shaderbo, + GLvoid * data, + int sizeinDWORD) +{ + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + + struct radeon_bo * pbo; + uint32_t *out; + +shader_again_alloc: + pbo = radeon_bo_open(radeonctx->radeonScreen->bom, + 0, + sizeinDWORD * 4, + 256, + RADEON_GEM_DOMAIN_GTT, + 0); + + if (!pbo) + { + rcommonFlushCmdBuf(radeonctx, __FUNCTION__); + goto shader_again_alloc; + } + + radeon_validate_bo(radeonctx, pbo, RADEON_GEM_DOMAIN_GTT, 0); + + if (radeon_revalidate_bos(radeonctx->glCtx) == GL_FALSE) + { + fprintf(stderr,"failure to revalidate BOs - badness\n"); + } + + radeon_bo_map(pbo, 1); + + radeon_bo_ref(pbo); + + out = (uint32_t*)(pbo->ptr); + + memcpy(out, data, sizeinDWORD * 4); + + *shaderbo = (void*)pbo; + + return GL_TRUE; +} + +GLboolean r600DeleteShader(GLcontext * ctx, + void * shaderbo) +{ + struct radeon_bo * pbo = (struct radeon_bo *)shaderbo; + + radeon_bo_unmap(pbo); + radeon_bo_unref(pbo); /* when bo->cref <= 0, bo will be bo_free */ + + return GL_TRUE; +} + +GLboolean r600EmitVec(GLcontext * ctx, + struct radeon_aos *aos, + GLvoid * data, + int size, + int stride, + int count) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + uint32_t *out; + + if (stride == 0) + { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); + aos->stride = 0; + } + else + { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); + aos->stride = size; + } + + aos->components = size; + aos->count = count; + + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + switch (size) { + case 1: r600EmitVec4(out, data, stride, count); break; + case 2: r600EmitVec8(out, data, stride, count); break; + case 3: r600EmitVec12(out, data, stride, count); break; + case 4: r600EmitVec16(out, data, stride, count); break; + default: + assert(0); + break; + } + + return GL_TRUE; +} + +void r600ReleaseVec(GLcontext * ctx) +{ + radeonReleaseArrays(ctx, ~0); +} + +void r600FreeDmaRegion(context_t *context, + void * shaderbo) +{ + struct radeon_bo *pbo = (struct radeon_bo *)shaderbo; + if(pbo) + { + radeon_bo_unref(pbo); + } +} diff --git a/src/mesa/drivers/dri/r600/r600_emit.h b/src/mesa/drivers/dri/r600/r600_emit.h index 3e1cf1ff5f..11765c868c 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.h +++ b/src/mesa/drivers/dri/r600/r600_emit.h @@ -1,41 +1,38 @@ -/* - * Copyright (C) 2005 Vladimir Dergachev. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ +/************************************************************************** + +Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD) + +Copyright (C) Advanced Micro Devices Inc. (AMD) 2009. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ /* * Authors: - * Vladimir Dergachev - * Nicolai Haehnle - * Aapo Tahkola - * Ben Skeggs - * Jerome Glisse + * Richard Li , + * CooperYuan , */ -/* This files defines functions for accessing R600 hardware. - */ + #ifndef __R600_EMIT_H__ #define __R600_EMIT_H__ @@ -44,180 +41,24 @@ #include "r600_cmdbuf.h" #include "radeon_reg.h" -static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, - int reg, int count) -{ - if (count) { - return CP_PACKET0(reg, count - 1); - } - return CP_PACKET2; -} - -static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - cmd.vpu.cmd_type = R300_CMD_VPU; - cmd.vpu.count = count; - cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; - cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); - - return cmd.u; -} - -static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn, - int addr, int count, int type, int clamp) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - cmd.r500fp.cmd_type = R300_CMD_R500FP; - cmd.r500fp.count = count; - cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; - cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; - cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; - cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); - - return cmd.u; -} - -static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - cmd.packet3.cmd_type = R300_CMD_PACKET3; - cmd.packet3.packet = packet; - - return cmd.u; -} - -static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, - unsigned short count) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - - cmd.delay.cmd_type = R300_CMD_CP_DELAY; - cmd.delay.count = count; - - return cmd.u; -} - -static INLINE uint32_t cmdwait(struct radeon_screen *rscrn, - unsigned char flags) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - cmd.wait.cmd_type = R300_CMD_WAIT; - cmd.wait.flags = flags; - - return cmd.u; -} - -static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn) -{ - drm_r300_cmd_header_t cmd; - - cmd.u = 0; - cmd.header.cmd_type = R300_CMD_END3D; - - return cmd.u; -} - -/** - * Write the header of a packet3 to the command buffer. - * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. - */ -#define OUT_BATCH_PACKET3(packet, num_extra) do {\ - if (!b_l_rmesa->radeonScreen->kernel_mm) { \ - OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\ - R300_CMD_PACKET3_RAW)); \ - } else b_l_rmesa->cmdbuf.cs->section_cdw++;\ - OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ - } while(0) - -/** - * Must be sent to switch to 2d commands - */ -void static INLINE end_3d(radeonContextPtr radeon) -{ - BATCH_LOCALS(radeon); - - if (!radeon->radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(1); - OUT_BATCH(cmdpacify(radeon->radeonScreen)); - END_BATCH(); - } -} - -void static INLINE cp_delay(r600ContextPtr rmesa, unsigned short count) -{ - BATCH_LOCALS(&rmesa->radeon); - - if (!rmesa->radeon.radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(1); - OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count)); - END_BATCH(); - } -} - -void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) -{ - BATCH_LOCALS(radeon); - uint32_t wait_until; - - if (!radeon->radeonScreen->kernel_mm) { - BEGIN_BATCH_NO_AUTOSTATE(1); - OUT_BATCH(cmdwait(radeon->radeonScreen, flags)); - END_BATCH(); - } else { - switch(flags) { - case R300_WAIT_2D: - wait_until = (1 << 14); - break; - case R300_WAIT_3D: - wait_until = (1 << 15); - break; - case R300_NEW_WAIT_2D_3D: - wait_until = (1 << 14) | (1 << 15); - break; - case R300_NEW_WAIT_2D_2D_CLEAN: - wait_until = (1 << 14) | (1 << 16) | (1 << 18); - break; - case R300_NEW_WAIT_3D_3D_CLEAN: - wait_until = (1 << 15) | (1 << 17) | (1 << 18); - break; - case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: - wait_until = (1 << 14) | (1 << 16) | (1 << 18); - wait_until |= (1 << 15) | (1 << 17) | (1 << 18); - break; - default: - return; - } - BEGIN_BATCH_NO_AUTOSTATE(2); - OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); - OUT_BATCH(wait_until); - END_BATCH(); - } -} - -extern int r600EmitArrays(GLcontext * ctx); - -extern int r600PrimitiveType(r600ContextPtr rmesa, int prim); -extern int r600NumVerts(r600ContextPtr rmesa, int num_verts, int prim); - -extern void r600EmitCacheFlush(r600ContextPtr rmesa); - -extern GLuint r600VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr); -extern GLuint r600VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); -extern GLuint r600VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); -extern GLuint r600VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); -extern GLuint r600VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); -extern GLuint r600VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); +extern GLboolean r600EmitShader(GLcontext * ctx, + void ** shaderbo, + GLvoid * data, + int sizeinDWORD); + +extern GLboolean r600DeleteShader(GLcontext * ctx, + void * shaderbo); + +extern GLboolean r600EmitVec(GLcontext * ctx, + struct radeon_aos *aos, + GLvoid * data, + int size, + int stride, + int count); + +extern void r600ReleaseVec(GLcontext * ctx); + +extern void r600FreeDmaRegion(context_t *context, + void * shaderbo); #endif diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index d0371cdda0..e534bb11b2 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -29,15 +29,15 @@ #include "main/glheader.h" #include "r600_context.h" +#include "r600_cmdbuf.h" #include "r700_chip.h" #include "r700_state.h" #include "r700_tex.h" #include "r700_oglprog.h" +#include "r700_fragprog.h" +#include "r700_vertprog.h" #include "r700_ioctl.h" -/* to be enable -#include "r700_emit.h" -*/ extern const struct tnl_pipeline_stage *r700_pipeline[]; @@ -370,15 +370,235 @@ GLboolean r700InitChipObject(context_t *context) return GL_TRUE; } -GLboolean r700SendContextStates(context_t *context) +void r700SetupVTXConstans(GLcontext * ctx, + unsigned int nStreamID, + void * pAos, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int count) /* number of vectors in stream */ { -#if 0 //to be enable + context_t *context = R700_CONTEXT(ctx); + uint32_t *dest; + struct radeon_aos * paos = (struct radeon_aos *)pAos; + offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF}; + + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + uSQ_VTX_CONSTANT_WORD1_0 = count * stride - 1; + + uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */ + |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift + |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */ + |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift + |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; + + uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift; + + uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift; + + BEGIN_BATCH_NO_AUTOSTATE(9); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD1_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD1_0, + RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); + + END_BATCH(); + COMMIT_BATCH(); + +} + +int r700SetupStreams(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + BATCH_LOCALS(&context->radeon); + + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + unsigned int unBit; + unsigned int i; + + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + OUT_BATCH(0); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + //context->aos_count = 0; + for(i=0; imesa_program.Base.InputsRead & unBit) + { + (context->chipobj.EmitVec)(ctx, + &context->radeon.tcl.aos[i], + vb->AttribPtr[i]->data, + vb->AttribPtr[i]->size, + vb->AttribPtr[i]->stride, + vb->Count); + + /* currently aos are packed */ + r700SetupVTXConstans(ctx, + i, + (void*)(&context->radeon.tcl.aos[i]), + (unsigned int)vb->AttribPtr[i]->size, + (unsigned int)(vb->AttribPtr[i]->size * 4), + (unsigned int)vb->Count); + } + } + + return R600_FALLBACK_NONE; +} + +inline GLboolean setRelocReg(context_t *context, unsigned int reg, + void * pbo_vs, void * pbo_fs) +{ + BATCH_LOCALS(&context->radeon); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + + struct radeon_bo * pbo; + uint32_t voffset; + offset_modifiers offset_mod; + + switch (reg + ASIC_CONTEXT_BASE_INDEX) + { + case mmCB_COLOR0_BASE: + case mmCB_COLOR1_BASE: + case mmCB_COLOR2_BASE: + case mmCB_COLOR3_BASE: + case mmCB_COLOR4_BASE: + case mmCB_COLOR5_BASE: + case mmCB_COLOR6_BASE: + case mmCB_COLOR7_BASE: + { + GLcontext *ctx = GL_CONTEXT(context); + struct radeon_renderbuffer *rrb; + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) + { + fprintf(stderr, "no rrb\n"); + return GL_FALSE; + } + + offset_mod.shift = RIGHT_SHIFT; + offset_mod.shiftbits = 8; + offset_mod.mask = 0x00FFFFFF; + + R600_OUT_BATCH_RELOC(r700->CB_COLOR0_BASE.u32All, + rrb->bo, + r700->CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); + return GL_TRUE; + } + break; + case mmDB_DEPTH_BASE: + { + GLcontext *ctx = GL_CONTEXT(context); + struct radeon_renderbuffer *rrb; + rrb = radeon_get_depthbuffer(&context->radeon); + + offset_mod.shift = RIGHT_SHIFT; + offset_mod.shiftbits = 8; + offset_mod.mask = 0x00FFFFFF; + + R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, + rrb->bo, + r700->DB_DEPTH_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0, &offset_mod); + + return GL_TRUE; + } + break; + case mmSQ_PGM_START_VS: + { + if(NULL != pbo_vs) + { + pbo = (struct radeon_bo *)pbo_vs; + } + else + { + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); + } + + offset_mod.shift = RIGHT_SHIFT; + offset_mod.shiftbits = 8; + offset_mod.mask = 0x00FFFFFF; + + R600_OUT_BATCH_RELOC(r700->SQ_PGM_START_VS.u32All, + pbo, + r700->SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + return GL_TRUE; + } + break; + case mmSQ_PGM_START_FS: + case mmSQ_PGM_START_ES: + case mmSQ_PGM_START_GS: + case mmSQ_PGM_START_PS: + { + if(NULL != pbo_fs) + { + pbo = (struct radeon_bo *)pbo_fs; + } + else + { + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); + } + + offset_mod.shift = RIGHT_SHIFT; + offset_mod.shiftbits = 8; + offset_mod.mask = 0x00FFFFFF; + + voffset = 0; + R600_OUT_BATCH_RELOC(r700->SQ_PGM_START_PS.u32All, + pbo, + r700->SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0, &offset_mod); + return GL_TRUE; + } + break; + } + + return GL_FALSE; +} + +GLboolean r700SendContextStates(context_t *context, void * pbo_vs, void * pbo_fs) +{ + BATCH_LOCALS(&context->radeon); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); ContextState * pState = r700->pStateList; ContextState * pInit; unsigned int toSend; - unsigned int ui; + unsigned int ui; while(NULL != pState) { @@ -401,17 +621,21 @@ GLboolean r700SendContextStates(context_t *context) pState = pState->pNext; - R700_CMDBUF_CHECK_SPACE(toSend + 2); - R700EP3(context, IT_SET_CONTEXT_REG, toSend); - R700E32(context, pInit->unOffset); - + BEGIN_BATCH_NO_AUTOSTATE(toSend + 2); + R600_OUT_BATCH_REGSEQ(((pInit->unOffset + ASIC_CONTEXT_BASE_INDEX)<<2), toSend); for(ui=0; uipuiValue)); + if( GL_FALSE == setRelocReg(context, (pInit->unOffset+ui), pbo_vs, pbo_fs) ) + { + /* for not reloc reg. */ + OUT_BATCH(*(pInit->puiValue)); + } pInit = pInit->pNext; }; + END_BATCH(); }; -#endif //to be enable + COMMIT_BATCH(); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h index f08190027c..30ca2cab84 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.h +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -446,13 +446,22 @@ typedef struct _R700_CHIP_CONTEXT ContextState * pStateList; R700_TEXTURE_STATES texture_states; + + GLboolean bEnablePerspective; } R700_CHIP_CONTEXT; #define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(context->chipobj.pvChipObj)) extern GLboolean r700InitChipObject(context_t *context); -extern GLboolean r700SendContextStates(context_t *context); +extern GLboolean r700SendContextStates(context_t *context, void * pbo_vs, void * pbo_fs); +extern int r700SetupStreams(GLcontext * ctx); +extern void r700SetupVTXConstans(GLcontext * ctx, + unsigned int nStreamID, + void * pAos, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int Count); /* number of vectors in stream */ #endif /* _R700_CHIP_H_ */ diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 5725360dac..743875dfe2 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -35,23 +35,12 @@ #include "r600_context.h" #include "r700_chip.h" -#if 0 /* to be enabled */ #include "r700_shaderinst.h" -#include "r700_emit.h" -#endif /* to be enabled */ +#include "r600_emit.h" extern void r700InitState (GLcontext * ctx); -#if 0 /* to be enabled */ -extern void r700SetupVTXConstans(GLcontext * ctx, - unsigned int nStreamID, - unsigned int aos_offset, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count); -extern GLboolean r700SendContextStates(context_t *context); extern GLboolean r700SyncSurf(context_t *context); -#endif /* to be enabled */ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) { @@ -63,17 +52,15 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) { GLcontext *ctx = GL_CONTEXT(context); -#if 0 /* to be enabled */ + BATCH_LOCALS(&context->radeon); + R700_CHIP_CONTEXT r700Saved; R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - struct r600_dma_region aos_vs; - struct r600_dma_region aos_fs; - struct r600_dma_region aos_vb; - aos_vs.buf = NULL; - aos_fs.buf = NULL; - aos_vb.buf = NULL; - + void * pbo_vs; + void * pbo_fs; + struct radeon_aos aos_vb; + unsigned int ui; GLfloat fTemp; GLfloat fVb[] = { 1.0f, 1.0f, 1.0f, 1.0f, @@ -93,7 +80,7 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) 0x10000000, 0x340C90, 0x10000400, 0x20340C90, 0x10000800, 0x40340C90, 0x90000C00, 0x60200C90}; - if (context->screen->chip.type <= CHIP_TYPE_RV670) + if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670) { uVs[9] = 0x541910; uVs[11] = 0x20541910; @@ -116,7 +103,7 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) r700InitState(ctx); - r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8; + r700SetRenderTarget(context); /* Turn off perspective divid. */ SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); @@ -152,15 +139,19 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) } /* Setup vb */ - R700_CMDBUF_CHECK_SPACE(6); - R700EP3 (context, IT_SET_CTL_CONST, 1); - R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R700E32 (context, 0); - R700EP3 (context, IT_SET_CTL_CONST, 1); - R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); - R700E32 (context, 0); + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + OUT_BATCH(0); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + (context->chipobj.EmitVec)(ctx, &aos_vb, (GLvoid *)fVb, 4, 16, 6); - r700SetupVTXConstans(ctx, VERT_ATTRIB_POS, (unsigned int)aos_vb.aos_offset, 4, 16, 6); + r700SetupVTXConstans(ctx, VERT_ATTRIB_POS, &aos_vb, 4, 16, 6); /* Setup shaders, copied from dump */ r700->SQ_PGM_RESOURCES_PS.u32All = 0; @@ -168,23 +159,26 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit); /* vs */ - (context->chipobj.EmitShader)(ctx, &aos_vs, (GLvoid *)(&uVs[0]), 28); - r700->SQ_PGM_START_VS.u32All = (aos_vs.aos_offset >> 8) & 0x00FFFFFF; + (context->chipobj.EmitShader)(ctx, &pbo_vs, (GLvoid *)(&uVs[0]), 28); + r700->SQ_PGM_START_VS.u32All = 0; r700->SQ_PGM_RESOURCES_VS.u32All = 0x00800004; + /* vs const */ /* TODO : Set color here */ - R700_CMDBUF_CHECK_SPACE(4 + 2); - R700EP3 (context, IT_SET_ALU_CONST, 4); - R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4); - R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[0]))); - R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[1]))); - R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[2]))); - R700E32 (context, *((unsigned int*)&(ctx->Color.ClearColor[3]))); + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, 4)); + OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4); + OUT_BATCH(*((unsigned int*)&(ctx->Color.ClearColor[0]))); + OUT_BATCH(*((unsigned int*)&(ctx->Color.ClearColor[1]))); + OUT_BATCH(*((unsigned int*)&(ctx->Color.ClearColor[2]))); + OUT_BATCH(*((unsigned int*)&(ctx->Color.ClearColor[3]))); + END_BATCH(); + COMMIT_BATCH(); r700->SPI_VS_OUT_CONFIG.u32All = 0x00000000; r700->SPI_PS_IN_CONTROL_0.u32All = 0x20000001; /* ps */ - (context->chipobj.EmitShader)(ctx, &aos_fs, (GLvoid *)(&uFs[0]), 12); - r700->SQ_PGM_START_PS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + (context->chipobj.EmitShader)(ctx, &pbo_fs, (GLvoid *)(&uFs[0]), 12); + r700->SQ_PGM_START_PS.u32All = 0; r700->SQ_PGM_RESOURCES_PS.u32All = 0x00800002; r700->SQ_PGM_EXPORTS_PS.u32All = 0x00000002; r700->DB_SHADER_CONTROL.u32All = 0x00000200; @@ -192,12 +186,12 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) r700->CB_SHADER_CONTROL.u32All = 0x00000001; /* set a valid base address to make the command checker happy */ - r700->SQ_PGM_START_FS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; - r700->SQ_PGM_START_ES.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; - r700->SQ_PGM_START_GS.u32All = (aos_fs.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_START_FS.u32All = 0; + r700->SQ_PGM_START_ES.u32All = 0; + r700->SQ_PGM_START_GS.u32All = 0; /* Now, send the states */ - r700SendContextStates(context); + r700SendContextStates(context, pbo_vs, pbo_fs); /* Draw */ GLuint numEntires, j; @@ -211,44 +205,47 @@ static GLboolean r700ClearWithDraw(context_t *context, GLbitfield mask) + 3 /* VGT_PRIMITIVE_TYPE */ + numIndices + 3; /* DRAW_INDEX_IMMD */ - R700_CMDBUF_CHECK_SPACE(numEntires); + BEGIN_BATCH_NO_AUTOSTATE(numEntires); SETfield(VGT_INDEX_TYPE, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R700EP3(context, IT_INDEX_TYPE, 0); - R700E32(context, VGT_INDEX_TYPE); + OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + OUT_BATCH(VGT_INDEX_TYPE); VGT_NUM_INDICES = numIndices; SETfield(VGT_PRIMITIVE_TYPE, DI_PT_TRILIST, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R700EP3(context, IT_SET_CONFIG_REG, 1); - R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R700E32(context, VGT_PRIMITIVE_TYPE); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + OUT_BATCH(VGT_PRIMITIVE_TYPE); SETfield(VGT_DRAW_INITIATOR, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); SETfield(VGT_DRAW_INITIATOR, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1)); - R700E32(context, VGT_NUM_INDICES); - R700E32(context, VGT_DRAW_INITIATOR); + OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1))); + OUT_BATCH(VGT_NUM_INDICES); + OUT_BATCH(VGT_DRAW_INITIATOR); for (j=0; jchipobj.FlushCmdBuffer)(context); /* TODO : keep these in context, don't load and release every time. */ - (context->chipobj.FreeDmaRegion)(context, &aos_vs); - (context->chipobj.FreeDmaRegion)(context, &aos_fs); - (context->chipobj.FreeDmaRegion)(context, &aos_vb); + (context->chipobj.DeleteShader)(context, &pbo_vs); + + (context->chipobj.DeleteShader)(context, &pbo_fs); + + (context->chipobj.FreeDmaRegion)(context, aos_vb.bo); /* Restore chip object. */ memcpy(r700, &r700Saved, sizeof(R700_CHIP_CONTEXT)); -#endif /* to be enabled */ - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 2b813828a5..d06af109e5 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -36,15 +36,11 @@ #include "shader/prog_statevars.h" #include "r600_context.h" +#include "r600_cmdbuf.h" #include "r700_chip.h" #include "r700_fragprog.h" -/* to be enabled */ -#if 0 -#include "r700_emit.h" -#endif - #include "r700_debug.h" //TODO : Validate FP input with VP output. @@ -250,9 +246,18 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_TRUE; } +void * r700GetActiveFpShaderBo(GLcontext * ctx) +{ + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + + return fp->shaderbo; +} + GLboolean r700SetupFragmentProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); @@ -271,14 +276,13 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) { Assemble( &(fp->r700Shader) ); } -/* to be enabled */ -#if 0 + /* Load fp to gpu */ (context->chipobj.EmitShader)(ctx, - &(fp->shadercode), + &(fp->shaderbo), (GLvoid *)(fp->r700Shader.pProgram), fp->r700Shader.uShaderBinaryDWORDSize); -#endif + fp->loaded = GL_TRUE; } @@ -288,10 +292,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) /* TODO : enable this after MemUse fixed *= (context->chipobj.MemUse)(context, fp->shadercode.buf->id); */ -/* to be enabled */ -#if 0 - r700->SQ_PGM_START_PS.u32All = (fp->shadercode.aos_offset >> 8) & 0x00FFFFFF; -#endif + + r700->SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */ unNumOfReg = fp->r700Shader.nRegs + 1; @@ -337,25 +339,27 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); -/* to be enabled */ -#if 0 + unNumParamData = paramList->NumParameters * 4; - R700_CMDBUF_CHECK_SPACE(2 + unNumParamData); - R700EP3(context, IT_SET_ALU_CONST, unNumParamData); + BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); + /* assembler map const from very beginning. */ - R700E32(context, SQ_ALU_CONSTANT_PS_OFFSET * 4); + OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4); unNumParamData = paramList->NumParameters; for(ui=0; uiParameterValues[ui][0]))); - R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][1]))); - R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][2]))); - R700E32(context, *((unsigned int*)&(paramList->ParameterValues[ui][3]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); } -#endif + END_BATCH(); + COMMIT_BATCH(); } return GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index 649ee4822d..9c7813e908 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -40,10 +40,8 @@ struct r700_fragment_program GLboolean translated; GLboolean loaded; GLboolean error; -/* to be enabled */ -#if 0 - struct r600_dma_region shadercode; -#endif + + void * shaderbo; GLboolean WritesDepth; GLuint optimization; @@ -60,4 +58,6 @@ extern GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_vp); extern GLboolean r700SetupFragmentProgram(GLcontext * ctx); +extern void * r700GetActiveFpShaderBo(GLcontext * ctx); + #endif /*_R700_FRAGPROG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c index 0541dc76e4..7493efab0f 100644 --- a/src/mesa/drivers/dri/r600/r700_ioctl.c +++ b/src/mesa/drivers/dri/r600/r700_ioctl.c @@ -37,12 +37,38 @@ #include "radeon_lock.h" #include "r600_context.h" +#include "r700_chip.h" #include "r700_ioctl.h" #include "r700_clear.h" +static void r700Flush(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + context_t * context = R700_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); + + /* okay if we have no cmds in the buffer && + we have no DMA flush && + we have no DMA buffer allocated. + then no point flushing anything at all. + */ + if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) + return; + + if (radeon->dma.flush) + radeon->dma.flush( ctx ); + + r700SendContextStates(context, NULL, NULL); + + if (radeon->cmdbuf.cs->cdw) + rcommonFlushCmdBuf(radeon, __FUNCTION__); +} + void r700InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r700Clear; functions->Finish = radeonFinish; - functions->Flush = radeonFlush; + functions->Flush = r700Flush; } diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index ee6377632b..dc8c936165 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -36,18 +36,16 @@ #include "r700_chip.h" #include "r700_oglprog.h" - -#if 0 /* to be enabled */ #include "r700_fragprog.h" #include "r700_vertprog.h" -#endif /* to be enabled */ + static struct gl_program *r700NewProgram(GLcontext * ctx, GLenum target, GLuint id) { struct gl_program *pProgram = NULL; -#if 0 /* to be enabled */ + struct r700_vertex_program *vp; struct r700_fragment_program *fp; @@ -62,7 +60,9 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, id); vp->translated = GL_FALSE; vp->loaded = GL_FALSE; - vp->shadercode.buf = NULL; + + vp->shaderbo = NULL; + break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -73,18 +73,19 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, id); fp->translated = GL_FALSE; fp->loaded = GL_FALSE; - fp->shadercode.buf = NULL; + + fp->shaderbo = NULL; + break; default: _mesa_problem(ctx, "Bad target in r700NewProgram"); } -#endif /* to be enabled */ + return pProgram; } static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { -#if 0 /* to be enabled */ struct r700_vertex_program * vp; struct r700_fragment_program * fp; context_t *context = R700_CONTEXT(ctx); @@ -95,7 +96,9 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) case GL_VERTEX_PROGRAM_ARB: vp = (struct r700_vertex_program*)prog; /* Release DMA region */ - (context->chipobj.FreeDmaRegion)(context, &(vp->shadercode)); + + (context->chipobj.DeleteShader)(ctx, vp->shaderbo); + /* Clean up */ Clean_Up_Assembler(&(vp->r700AsmCode)); Clean_Up_Shader(&(vp->r700Shader)); @@ -104,7 +107,9 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) case GL_FRAGMENT_PROGRAM_ARB: fp = (struct r700_fragment_program*)prog; /* Release DMA region */ - (context->chipobj.FreeDmaRegion)(context, &(fp->shadercode)); + + (context->chipobj.DeleteShader)(ctx, fp->shaderbo); + /* Clean up */ Clean_Up_Assembler(&(fp->r700AsmCode)); Clean_Up_Shader(&(fp->r700Shader)); @@ -114,7 +119,6 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) } _mesa_delete_program(ctx, prog); -#endif /* to be enabled */ } static void diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index fce99ada54..f338cf823c 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -45,107 +45,64 @@ #include "tnl/t_pipeline.h" #include "r600_context.h" +#include "r600_cmdbuf.h" #include "r700_chip.h" +#include "r700_tex.h" -/* to be enable #include "r700_vertprog.h" #include "r700_fragprog.h" #include "r700_state.h" -#include "r700_tex.h" -#include "r700_emit.h" -*/ -#if 0 // to be enable void r700WaitForIdle(context_t *context) { - R700EP3 (context, IT_SET_CONFIG_REG, 1); - R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); - R700E32 (context, 1 << 15); + BATCH_LOCALS(&context->radeon); + BEGIN_BATCH_NO_AUTOSTATE(3); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); + OUT_BATCH(1 << 15); + + END_BATCH(); + COMMIT_BATCH(); } void r700WaitForIdleClean(context_t *context) { - R700EP3 (context, IT_EVENT_WRITE, 0); - R700E32 (context, 0x16); + BATCH_LOCALS(&context->radeon); + BEGIN_BATCH_NO_AUTOSTATE(5); + + OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); + OUT_BATCH(0x16); - R700EP3 (context, IT_SET_CONFIG_REG, 1); - R700E32 (context, mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); - R700E32 (context, 1 << 17); + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX); + OUT_BATCH(1 << 17); + + END_BATCH(); + COMMIT_BATCH(); } static void r700Start3D(context_t *context) { - if (context->screen->chip.type <= CHIP_TYPE_RV670) + BATCH_LOCALS(&context->radeon); + if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670) { - R700EP3 (context, IT_START_3D_CMDBUF, 1); - R700E32 (context, 0); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 1)); + OUT_BATCH(0); + END_BATCH(); } - R700EP3 (context, IT_CONTEXT_CONTROL, 1); - R700E32 (context, 0x80000000); - R700E32 (context, 0x80000000); - r700WaitForIdleClean(context); -} + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1)); + OUT_BATCH(0x80000000); + OUT_BATCH(0x80000000); + END_BATCH(); + COMMIT_BATCH(); -static int r700SetupStreams(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; - - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - unsigned int unBit; - unsigned int i; - - R700_CMDBUF_CHECK_SPACE(6); - R700EP3 (context, IT_SET_CTL_CONST, 1); - R700E32 (context, mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R700E32 (context, 0); - - R700EP3 (context, IT_SET_CTL_CONST, 1); - R700E32 (context, mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); - R700E32 (context, 0); - - context->aos_count = 0; - for(i=0; imesa_program.Base.InputsRead & unBit) - { - (context->chipobj.EmitVec)(ctx, - &(context->aos[context->aos_count]), - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - - context->aos[context->aos_count].aos_size = vb->AttribPtr[i]->size; - - /* currently aos are packed */ - r700SetupVTXConstans(ctx, - i, - (unsigned int)context->aos[context->aos_count].aos_offset, - (unsigned int)vb->AttribPtr[i]->size, - (unsigned int)(vb->AttribPtr[i]->size * 4), - (unsigned int)vb->Count); - /* TODO : enable this after MemUse fixed *= - (context->chipobj.MemUse)(context, context->aos[context->aos_count].buf->id); - */ - - context->aos_count++; - } - } - for(i=context->aos_count; iaos[i].buf = NULL; - } - - return R600_FALLBACK_NONE; + r700WaitForIdleClean(context); } static GLboolean r700SetupShaders(GLcontext * ctx) @@ -177,7 +134,7 @@ GLboolean r700SendTextureState(context_t *context) unsigned int i; R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - +#if 0 /* to be enabled */ for(i=0; itexture_states.textures[i] != 0) @@ -204,12 +161,15 @@ GLboolean r700SendTextureState(context_t *context) R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER2.u32All); } } - +#endif return GL_TRUE; } GLboolean r700SyncSurf(context_t *context) { +#if 0 //to be enabled + BATCH_LOCALS(&context->radeon); + /* TODO : too heavy? */ unsigned int CP_COHER_CNTL = 0; @@ -221,23 +181,20 @@ GLboolean r700SyncSurf(context_t *context) |SMX_ACTION_ENA_bit; - R700_CMDBUF_CHECK_SPACE(5); - R700EP3(context, IT_SURFACE_SYNC, 3); - R700E32(context, CP_COHER_CNTL); - R700E32(context, 0xFFFFFFFF); - R700E32(context, 0x00000000); - R700E32(context, 10); + BEGIN_BATCH_NO_AUTOSTATE(5); + + OUT_BATCH(CP_PACKET3((IT_SURFACE_SYNC << 8), 3))); + OUT_BATCH(CP_COHER_CNTL); + OUT_BATCH(0xFFFFFFFF); + OUT_BATCH(0x00000000); + OUT_BATCH(10); + END_BATCH(); + COMMIT_BATCH(); +#endif return GL_TRUE; } -static void r700SetRenderTarget(context_t *context) -{ - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - - r700->CB_COLOR0_BASE.u32All = context->target.rt.gpu >> 8; -} - unsigned int r700PrimitiveType(int prim) { switch (prim & PRIM_MODE_MASK) @@ -279,21 +236,21 @@ unsigned int r700PrimitiveType(int prim) } } -#endif // to be enable - static GLboolean r700RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { -#if 0 // to be enable context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + + BATCH_LOCALS(&context->radeon); + unsigned int i, j; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); - if (context->screen->chip.type <= CHIP_TYPE_RV670) + if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670) { fp->r700AsmCode.bR6xx = 1; } @@ -329,7 +286,7 @@ static GLboolean r700RunRender(GLcontext * ctx, r700->SQ_PGM_START_ES.u32All = r700->SQ_PGM_START_PS.u32All; r700->SQ_PGM_START_GS.u32All = r700->SQ_PGM_START_PS.u32All; - r700SendContextStates(context); + r700SendContextStates(context, NULL, NULL); /* richard test code */ for (i = 0; i < vb->PrimitiveCount; i++) @@ -351,51 +308,55 @@ static GLboolean r700RunRender(GLcontext * ctx, + numIndices + 3 /* DRAW_INDEX_IMMD */ + 2; /* test stamp */ - R700_CMDBUF_CHECK_SPACE(numEntires); + BEGIN_BATCH_NO_AUTOSTATE(numEntires); VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift; - R700EP3(context, IT_INDEX_TYPE, 0); - R700E32(context, VGT_INDEX_TYPE); + OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + OUT_BATCH(VGT_INDEX_TYPE); VGT_NUM_INDICES = numIndices; VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift; - R700EP3(context, IT_SET_CONFIG_REG, 1); - R700E32(context, mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R700E32(context, VGT_PRIMITIVE_TYPE); + OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + OUT_BATCH(VGT_PRIMITIVE_TYPE); VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift; VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift; - R700EP3(context, IT_DRAW_INDEX_IMMD, (numIndices + 1)); - R700E32(context, VGT_NUM_INDICES); - R700E32(context, VGT_DRAW_INITIATOR); + OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1))); + OUT_BATCH(VGT_NUM_INDICES); + OUT_BATCH(VGT_DRAW_INITIATOR); for (j=0; jchipobj.FlushCmdBuffer)(context); - /* free aos => TODO : cache mgr */ - for (i = 0; i < context->aos_count; i++) - { - (context->chipobj.FreeDmaRegion)(context, &(context->aos[i])); - } -#endif // to be enable + (context->chipobj.ReleaseArrays)(ctx); + + //richard test + /* test stamp, write a number to mmSCRATCH4 */ + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL((0x2144 << 2), 0x56785678); + END_BATCH(); + COMMIT_BATCH(); + + rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ ); + return GL_FALSE; } @@ -412,11 +373,19 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ { GLboolean bRet = GL_FALSE; -#if 0 // to be enable + /* TODO : sw fallback */ + + /** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ + if(!r700ValidateBuffers(ctx)) + { + return GL_TRUE; + } + context_t *context = R700_CONTEXT(ctx); r700UpdateShaders(ctx); -#endif // to be enable bRet = r700RunRender(ctx, stage); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 4341f33665..a31a89627c 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -52,10 +52,9 @@ #include "r700_chip.h" #include "r700_state.h" -#if 0 /* to be enabled */ #include "r700_fragprog.h" #include "r700_vertprog.h" -#endif /* to be enabled */ + void r700SetDefaultStates(context_t *context) //-------------------- { @@ -68,13 +67,13 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; -#if 0 /* to be enabled */ + struct r700_vertex_program *vp; int i; - if (context->NewGLState) + if (context->radeon.NewGLState) { - context->NewGLState = 0; + context->radeon.NewGLState = 0; for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { @@ -108,7 +107,6 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- } r700UpdateStateParameters(ctx, _NEW_PROGRAM); -#endif /* to be enabled */ } /* @@ -116,6 +114,7 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- */ void r700UpdateViewportOffset(GLcontext * ctx) //------------------ { + return; } /** @@ -152,7 +151,6 @@ static void r700FetchStateParameter(GLcontext * ctx, void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //-------------------- { -#if 0 /* to be enabled */ struct r700_fragment_program *fp; struct gl_program_parameter_list *paramList; GLuint i; @@ -183,7 +181,6 @@ void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //------------ paramList->ParameterValues[i]); } } -#endif /* to be enabled */ } /** @@ -191,7 +188,6 @@ void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //------------ */ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //------------------- { -#if 0 /* to be enabled */ context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); @@ -204,12 +200,16 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + r700UpdateDrawBuffer(ctx); } r700UpdateStateParameters(ctx, new_state); - if(GL_TRUE == context->bEnablePerspective) + if(GL_TRUE == r700->bEnablePerspective) { /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */ CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); @@ -232,8 +232,7 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit); } - context->NewGLState |= new_state; -#endif /* to be enabled */ + context->radeon.NewGLState |= new_state; } static void r700SetDepthState(GLcontext * ctx) @@ -492,17 +491,11 @@ static void r700Viewport(GLcontext * ctx, GLsizei width, GLsizei height) //-------------------- { -#if 0 /* to be enabled */ context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - context->vport_x = x; - context->vport_y = y; - context->vport_width = width; - context->vport_height= height; - - __DRIdrawablePrivate *dPriv = context->currentDraw; + __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable; GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; @@ -534,12 +527,12 @@ static void r700Viewport(GLcontext * ctx, tz = v[MAT_TZ] * scale; /* TODO : Need DMA flush as well. */ - +#if 0 /* to be enabled */ if(context->cmdbuf.count_used > 0) { (context->chipobj.FlushCmdBuffer)(context); } - +#endif /* to be enabled */ r700->PA_CL_VPORT_XSCALE.u32All = *((unsigned int*)(&sx)); r700->PA_CL_VPORT_XOFFSET.u32All = *((unsigned int*)(&tx)); @@ -548,7 +541,6 @@ static void r700Viewport(GLcontext * ctx, r700->PA_CL_VPORT_ZSCALE.u32All = *((unsigned int*)(&sz)); r700->PA_CL_VPORT_ZOFFSET.u32All = *((unsigned int*)(&tz)); -#endif /* to be enabled */ } @@ -600,60 +592,47 @@ static void r700Scissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) } } - -/** - * Calculate initial hardware state and register state functions. - * Assumes that the command buffer and state atoms have been - * initialized already. - */ -void r700InitState(GLcontext * ctx) //------------------- +void r700SetRenderTarget(context_t *context) { -#if 0 /* to be enabled */ - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); - - if(context->ctx->Visual.doubleBufferMode && context->sarea->pfCurrentPage == 0) - { - context->target.rt = context->screen->backBuffer; - } - else - { - context->target.rt = context->screen->frontBuffer; - } + struct radeon_renderbuffer *rrb; + unsigned int nPitchInPixel; + + /* screen/window/view */ SETfield(r700->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); SETfield(r700->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); /* screen */ r700->PA_SC_SCREEN_SCISSOR_TL.u32All = 0x0; - SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->width, + + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->width, PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask); - SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, context->screen->height, + SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, ((RADEONDRIPtr)(context->radeon.radeonScreen->driScreen->pDevPriv))->height, PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask); /* window */ SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); - SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->x, + SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->radeon.dri.drawable->x, PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask); - SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->currentDraw->y, + SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, context->radeon.dri.drawable->y, PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask); - SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, + SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->radeon.dri.drawable->x + context->radeon.dri.drawable->w, PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask); - SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, + SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, context->radeon.dri.drawable->y + context->radeon.dri.drawable->h, PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask); /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */ r700->PA_SC_CLIPRECT_RULE.u32All = 0x0000FFFF; - SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->x, + SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->radeon.dri.drawable->x, PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask); - SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->currentDraw->y, + SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, context->radeon.dri.drawable->y, PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask); - SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, + SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->radeon.dri.drawable->x + context->radeon.dri.drawable->w, PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask); - SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, + SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, context->radeon.dri.drawable->y + context->radeon.dri.drawable->h, PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask); r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All; @@ -665,42 +644,138 @@ void r700InitState(GLcontext * ctx) //------------------- /* more....2d clip */ SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit); - SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->x, + SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->radeon.dri.drawable->x, PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask); - SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->currentDraw->y, + SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, context->radeon.dri.drawable->y, PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask); - SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->x + context->currentDraw->w, + SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->radeon.dri.drawable->x + context->radeon.dri.drawable->w, PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask); - SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->currentDraw->y + context->currentDraw->h, + SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, context->radeon.dri.drawable->y + context->radeon.dri.drawable->h, PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask); SETbit(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit); - SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->x, + SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->radeon.dri.drawable->x, PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->currentDraw->y, + SETfield(r700->PA_SC_VPORT_SCISSOR_0_TL.u32All, context->radeon.dri.drawable->y, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->x + context->currentDraw->w, + SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->radeon.dri.drawable->x + context->radeon.dri.drawable->w, PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->currentDraw->y + context->currentDraw->h, + SETfield(r700->PA_SC_VPORT_SCISSOR_0_BR.u32All, context->radeon.dri.drawable->y + context->radeon.dri.drawable->h, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); SETbit(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, WINDOW_OFFSET_DISABLE_bit); - SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->x, + SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->radeon.dri.drawable->x, PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->currentDraw->y, + SETfield(r700->PA_SC_VPORT_SCISSOR_1_TL.u32All, context->radeon.dri.drawable->y, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->x + context->currentDraw->w, + SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->radeon.dri.drawable->x + context->radeon.dri.drawable->w, PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask); - SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->currentDraw->y + context->currentDraw->h, + SETfield(r700->PA_SC_VPORT_SCISSOR_1_BR.u32All, context->radeon.dri.drawable->y + context->radeon.dri.drawable->h, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); /* setup viewport */ - r700Viewport(ctx, + r700Viewport(GL_CONTEXT(context), 0, 0, - context->currentDraw->w, - context->currentDraw->h); + context->radeon.dri.drawable->w, + context->radeon.dri.drawable->h); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + /* color buffer */ + r700->CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset; + nPitchInPixel = rrb->pitch/rrb->cpp; + SETfield(r700->CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * rrb->base.Height)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + r700->CB_COLOR0_BASE.u32All = 0; + SETfield(r700->CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + if(4 == rrb->cpp) + { + SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); + } + else + { + SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_5_6_5, + CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); + SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT_REV, + COMP_SWAP_shift, COMP_SWAP_mask); + } + SETbit(r700->CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); + SETbit(r700->CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); + SETfield(r700->CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + + /* depth buf */ + r700->DB_DEPTH_SIZE.u32All = 0; + r700->DB_DEPTH_BASE.u32All = 0; + r700->DB_DEPTH_INFO.u32All = 0; + + r700->DB_DEPTH_CONTROL.u32All = 0; + r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; + r700->DB_DEPTH_VIEW.u32All = 0; + r700->DB_RENDER_CONTROL.u32All = 0; + r700->DB_RENDER_OVERRIDE.u32All = 0; + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (!rrb) + return; + + nPitchInPixel = rrb->pitch/rrb->cpp; + + SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * rrb->base.Height)/64 )-1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ + + if(4 == rrb->cpp) + { + switch (GL_CONTEXT(context)->Visual.depthBits) + { + case 16: + case 24: + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", + GL_CONTEXT(context)->Visual.depthBits); + _mesa_exit(-1); + } + } + else + { + SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, + DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); + } + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, + DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); + /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ +} + +/** + * Calculate initial hardware state and register state functions. + * Assumes that the command buffer and state atoms have been + * initialized already. + */ +void r700InitState(GLcontext * ctx) //------------------- +{ + context_t *context = R700_CONTEXT(ctx); + + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + /* Turn off vgt reuse */ r700->VGT_REUSE_OFF.u32All = 0; SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit); @@ -740,7 +815,7 @@ void r700InitState(GLcontext * ctx) //------------------- POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); /* Do scale XY and Z by 1/W0. */ - context->bEnablePerspective = GL_TRUE; + r700->bEnablePerspective = GL_TRUE; CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit); CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit); SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit); @@ -811,35 +886,8 @@ void r700InitState(GLcontext * ctx) //------------------- /* depth buf */ r700->DB_DEPTH_SIZE.u32All = 0; - SETfield(r700->DB_DEPTH_SIZE.u32All, (context->screen->depthBuffer.pitch/8)-1, - PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); - SETfield(r700->DB_DEPTH_SIZE.u32All, ( (context->screen->depthBuffer.size / context->screen->cpp)/64 )-1, - SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */ - r700->DB_DEPTH_BASE.u32All = context->screen->depthBuffer.gpu >> 8; + r700->DB_DEPTH_BASE.u32All = 0; r700->DB_DEPTH_INFO.u32All = 0; - if(4 == context->screen->cpp) /* TODO : in scrren create, gives z its own format alloc. */ - { - switch (ctx->Visual.depthBits) - { - case 16: - case 24: - SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24, - DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - } - } - else - { - SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, - DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); - } - SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, - DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); - /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ r700->DB_DEPTH_CONTROL.u32All = 0; r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; r700->DB_DEPTH_VIEW.u32All = 0; @@ -850,27 +898,9 @@ void r700InitState(GLcontext * ctx) //------------------- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); /* color buffer */ - SETfield(r700->CB_COLOR0_SIZE.u32All, (context->screen->frontBuffer.pitch/8)-1, - PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); - SETfield(r700->CB_COLOR0_SIZE.u32All, ( (context->screen->frontBuffer.size / context->screen->cpp)/64 )-1, - SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); - r700->CB_COLOR0_BASE.u32All = context->screen->frontBuffer.gpu >> 8; - SETfield(r700->CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); - SETfield(r700->CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL, - CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); - if(4 == context->screen->cpp) - { - SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_8_8_8_8, - CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); - SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT, COMP_SWAP_shift, COMP_SWAP_mask); - } - else - { - SETfield(r700->CB_COLOR0_INFO.u32All, COLOR_5_6_5, - CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); - SETfield(r700->CB_COLOR0_INFO.u32All, SWAP_ALT_REV, - COMP_SWAP_shift, COMP_SWAP_mask); - } + r700->CB_COLOR0_SIZE.u32All = 0; + r700->CB_COLOR0_BASE.u32All = 0; + r700->CB_COLOR0_INFO.u32All = 0; SETbit(r700->CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); SETbit(r700->CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit); SETfield(r700->CB_COLOR0_INFO.u32All, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); @@ -879,8 +909,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_COLOR0_FRAG.u32All = 0; r700->CB_COLOR0_MASK.u32All = 0; - r700->PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000; -#endif /* to be enabled */ + r700->PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000; } void r700InitStateFuncs(struct dd_function_table *functions) //----------------- diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index b9542f4dd2..a75c7f0188 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -42,6 +42,7 @@ extern void r700UpdateDrawBuffer (GLcontext * ctx); extern void r700InitState (GLcontext * ctx); extern void r700InitStateFuncs (struct dd_function_table *functions); +extern void r700SetRenderTarget(context_t *context); extern void r700SetDefaultStates(context_t * context); #endif /* _R600_SCREEN_H */ diff --git a/src/mesa/drivers/dri/r600/r700_tex.c b/src/mesa/drivers/dri/r600/r700_tex.c index 75300d2c5f..85352022bc 100644 --- a/src/mesa/drivers/dri/r600/r700_tex.c +++ b/src/mesa/drivers/dri/r600/r700_tex.c @@ -42,9 +42,7 @@ #include "r600_context.h" #include "r700_chip.h" -#if 0 /* to be enabled */ #include "r700_state.h" -#endif /* to be enabled */ #include "r700_tex.h" @@ -72,7 +70,6 @@ void r700SetTexOffset(__DRIcontext *pDRICtx, GLint texname, { } -#if 0 /* to be enabled */ static GLboolean r700GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format) { r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; @@ -548,7 +545,6 @@ static void compute_tex_image_offset( *curOffset += size; } -#endif /* to be enabled */ void r700DestroyTexObj(context_t context, r700TexObjPtr t) { @@ -563,9 +559,9 @@ void r700DestroyTexObj(context_t context, r700TexObjPtr t) //} } -#if 0 /* to be enabled */ static void r700SetTexImages(context_t *context, struct gl_texture_object *tObj) { +#if 0 /* to be enabled */ r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; GLint curOffset; @@ -668,6 +664,7 @@ static void r700SetTexImages(context_t *context, struct gl_texture_object *tObj) TEX_WIDTH_shift, TEX_WIDTH_mask); SETfield(t->texture_state.SQ_TEX_RESOURCE1.u32All, tObj->Image[0][t->base.firstLevel]->Height - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); +#endif /* to be enabled */ } static void r700UploadSubImage(context_t *context, @@ -677,6 +674,7 @@ static void r700UploadSubImage(context_t *context, GLint y, GLuint face) { +#if 0 /* to be enabled */ struct gl_texture_image *texImage = NULL; GLuint offset; GLint imageWidth, imageHeight; @@ -739,10 +737,12 @@ static void r700UploadSubImage(context_t *context, t->src_hight_in_pexel[face][hwlevel], /* height */ t->byte_per_texel, /* byte_per_pixel */ pSrc); /* source data */ +#endif /* to be enabled */ } int r700UploadTexImages(GLcontext * ctx, struct gl_texture_object *tObj, GLuint face) { +#if 0 /* to be enabled */ context_t *context = R700_CONTEXT(ctx); r700TexObjPtr t = (r700TexObjPtr) tObj->DriverData; @@ -823,7 +823,7 @@ int r700UploadTexImages(GLcontext * ctx, struct gl_texture_object *tObj, GLuint SETfield(t->texture_state.SQ_TEX_RESOURCE4.u32All, t->base.firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask); SETfield(t->texture_state.SQ_TEX_RESOURCE5.u32All, t->base.lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask); } - +#endif /* to be enabled */ return 0; } @@ -889,8 +889,6 @@ static const struct gl_texture_format *r700Choose8888TexFormat(GLenum srcFormat, return gtfRet; } -#endif /* to be enabled */ - static r700TexObjPtr r700AllocTexObj(struct gl_texture_object *texObj) { r700TexObjPtr t; @@ -1527,6 +1525,16 @@ void r700UpdateTextureState(context_t * context) #endif /* to be enabled */ } +/** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ +GLboolean r700ValidateBuffers(GLcontext * ctx) +{ + /* TODO */ + + return radeon_revalidate_bos(ctx); +} + void r700InitTextureFuncs(struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver diff --git a/src/mesa/drivers/dri/r600/r700_tex.h b/src/mesa/drivers/dri/r600/r700_tex.h index 13290578a1..e322bbc47d 100644 --- a/src/mesa/drivers/dri/r600/r700_tex.h +++ b/src/mesa/drivers/dri/r600/r700_tex.h @@ -97,6 +97,8 @@ extern void r700SetTexOffset(__DRIcontext *pDRICtx, extern void r700DestroyTexObj(context_t rmesa, r700TexObjPtr t); +extern GLboolean r700ValidateBuffers(GLcontext * ctx); + extern void r700InitTextureFuncs(struct dd_function_table *functions); #endif /* __r700_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 22a12a00d1..f0dfbe20f0 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -39,15 +39,12 @@ #include "shader/prog_statevars.h" #include "r600_context.h" +#include "r600_cmdbuf.h" #include "r700_chip.h" #include "r700_debug.h" #include "r700_vertprog.h" -#if 0 /* to be enabled */ -#include "r700_emit.h" -#endif - unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, struct gl_vertex_program *mesa_vp, unsigned int unStart) @@ -297,11 +294,10 @@ GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, void r700SelectVertexShader(GLcontext *ctx) { -#if 0 /* to be enabled */ context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vpc = (struct r700_vertex_program *)ctx->VertexProgram._Current; - if (context->screen->chip.type <= CHIP_TYPE_RV670) + if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670) { vpc->r700AsmCode.bR6xx = 1; } @@ -327,56 +323,22 @@ void r700SelectVertexShader(GLcontext *ctx) r700TranslateVertexShader(vpc, &(vpc->mesa_program) ); } -#endif /* to be enabled */ } -void r700SetupVTXConstans(GLcontext * ctx, - unsigned int nStreamID, - unsigned int aos_offset, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ +void * r700GetActiveVpShaderBo(GLcontext * ctx) { - context_t *context = R700_CONTEXT(ctx); - uint32_t *dest; - - unsigned int uSQ_VTX_CONSTANT_WORD0_0; - unsigned int uSQ_VTX_CONSTANT_WORD1_0; - unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; - unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; - unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; - - uSQ_VTX_CONSTANT_WORD0_0 = aos_offset; - uSQ_VTX_CONSTANT_WORD1_0 = count * stride - 1; - - uSQ_VTX_CONSTANT_WORD2_0 |= 0 << BASE_ADDRESS_HI_shift /* TODO */ - |stride << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift - |GetSurfaceFormat(GL_FLOAT, size, NULL) << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift /* TODO : trace back api for initial data type, not only GL_FLOAT */ - |SQ_NUM_FORMAT_SCALED << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift - |SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit; - - uSQ_VTX_CONSTANT_WORD3_0 |= 1 << MEM_REQUEST_SIZE_shift; - - uSQ_VTX_CONSTANT_WORD6_0 |= SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift; -#if 0 /* to be enabled */ - R700_CMDBUF_CHECK_SPACE(9); - R700EP3 (context, IT_SET_RESOURCE, 7); - R700E32 (context, (nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); - - R700E32 (context, uSQ_VTX_CONSTANT_WORD0_0); - R700E32 (context, uSQ_VTX_CONSTANT_WORD1_0); - R700E32 (context, uSQ_VTX_CONSTANT_WORD2_0); - R700E32 (context, uSQ_VTX_CONSTANT_WORD3_0); - R700E32 (context, 0); - R700E32 (context, 0); - R700E32 (context, uSQ_VTX_CONSTANT_WORD6_0); -#endif /* to be enabled */ + struct r700_vertex_program *vp + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + return vp->shaderbo; } GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); struct r700_vertex_program *vp @@ -396,7 +358,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) /* Load vp to gpu */ (context->chipobj.EmitShader)(ctx, - &(vp->shadercode), + &(vp->shaderbo), (GLvoid *)(vp->r700Shader.pProgram), vp->r700Shader.uShaderBinaryDWORDSize); @@ -410,7 +372,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) (context->chipobj.MemUse)(context, vp->shadercode.buf->id); */ - r700->SQ_PGM_START_VS.u32All = (vp->shadercode.aos_offset >> 8) & 0x00FFFFFF; + r700->SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */ SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); @@ -438,24 +400,26 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); -#if 0 /* to be enabled */ + unNumParamData = paramList->NumParameters * 4; - R700_CMDBUF_CHECK_SPACE(unNumParamData + 2); - R700EP3 (context, IT_SET_ALU_CONST, unNumParamData); + BEGIN_BATCH_NO_AUTOSTATE(unNumParamData + 2); + + OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData)); /* assembler map const from very beginning. */ - R700E32 (context, SQ_ALU_CONSTANT_VS_OFFSET * 4); + OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4); unNumParamData = paramList->NumParameters; for(ui=0; uiParameterValues[ui][0]))); - R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][1]))); - R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][2]))); - R700E32 (context, *((unsigned int*)&(paramList->ParameterValues[ui][3]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2]))); + OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3]))); } -#endif /* to be enabled */ + END_BATCH(); + COMMIT_BATCH(); } return GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index 0822335c10..6a9726a3d0 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -55,7 +55,8 @@ struct r700_vertex_program /* ... */ - struct r600_dma_region shadercode; + void * shaderbo; + ArrayDesc aos_desc[VERT_ATTRIB_MAX]; }; @@ -82,11 +83,6 @@ extern void r700SelectVertexShader(GLcontext *ctx); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); -extern void r700SetupVTXConstans(GLcontext * ctx, - unsigned int nStreamID, - unsigned int aos_offset, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int Count); /* number of vectors in stream */ +extern void * r700GetActiveVpShaderBo(GLcontext * ctx); #endif /* _R700_VERTPROG_H_ */ -- cgit v1.2.3