diff options
Diffstat (limited to 'src/mesa')
105 files changed, 3303 insertions, 505 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index bac1c3a49c..016f27a6a3 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -295,8 +295,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + &cc, sizeof(cc)); /* Emit CC viewport relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index af1d975de9..d3275c7a89 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -130,13 +130,14 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache( &brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->clip.prog_data ); + brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->clip.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index c8f24a94e4..22df7722b6 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -143,8 +143,7 @@ clip_unit_create_from_key(struct brw_context *brw, bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + &clip, sizeof(clip)); /* Emit clip program relocation */ assert(brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0dd3087143..79818b92b7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,7 +131,6 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -332,7 +331,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; /* Record of the last BOs chosen for each cache_id. Used to set @@ -583,6 +581,7 @@ struct brw_context struct { struct brw_vs_prog_data *prog_data; + int8_t *constant_map; /* variable array following prog_data */ dri_bo *prog_bo; dri_bo *state_bo; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 190310afbb..22e3e732f4 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -256,13 +256,24 @@ static void prepare_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - /* XXX just use a memcpy here */ - for (i = 0; i < nr; i++) { - const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; - buf[offset + i * 4 + 0] = value[0]; - buf[offset + i * 4 + 1] = value[1]; - buf[offset + i * 4 + 2] = value[2]; - buf[offset + i * 4 + 3] = value[3]; + if (vp->use_const_buffer) { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } + } + } else { + for (i = 0; i < nr; i++) { + memcpy(buf + offset + i * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 1bc3eccf49..7261b316c1 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->gs.prog_data ); + brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->gs.prog_data); } static const GLenum gs_prim[GL_POLYGON+1] = { diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 1af5790a67..7d5a944bf7 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -108,8 +108,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + &gs, sizeof(gs)); if (key->prog_active) { /* Emit GS program relocation */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 968890f7fb..8e6839b812 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ dri_bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->sf.prog_data ); + brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + sizeof(c.prog_data), + &brw->sf.prog_data); } /* Calculate interpolants for triangle and line rasterization. diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 09223b7cfb..b9b42cd6d5 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -309,8 +309,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + &sf, sizeof(sf)); /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 9c9d145c4b..536fe8b249 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -124,16 +124,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); +drm_intel_bo *brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz); + +drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + GLuint aux_sz, + void *aux_return); dri_bo *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e4c9ba7d87..5fc47b0420 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -71,25 +71,23 @@ static GLuint -hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +hash_key(struct brw_cache_item *item) { - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; + GLuint *ikey = (GLuint *)item->key; + GLuint hash = item->cache_id, i; - assert(key_size % 4 == 0); + assert(item->key_size % 4 == 0); /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) { + for (i = 0; i < item->key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); - for (i = 0; i < key_size/4; i++) { + ikey = (GLuint *)item->reloc_bufs; + for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); } @@ -114,11 +112,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } +static int +brw_cache_item_equals(const struct brw_cache_item *a, + const struct brw_cache_item *b) +{ + return a->cache_id == b->cache_id && + a->hash == b->hash && + a->key_size == b->key_size && + (memcmp(a->key, b->key, a->key_size) == 0) && + a->nr_reloc_bufs == b->nr_reloc_bufs && + (memcmp(a->reloc_bufs, b->reloc_bufs, + a->nr_reloc_bufs * sizeof(dri_bo *)) == 0); +} static struct brw_cache_item * -search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, - GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +search_cache(struct brw_cache *cache, GLuint hash, + struct brw_cache_item *lookup) { struct brw_cache_item *c; @@ -133,13 +142,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, #endif for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->cache_id == cache_id && - c->hash == hash && - c->key_size == key_size && - memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + if (brw_cache_item_equals(lookup, c)) return c; } @@ -182,10 +185,18 @@ brw_search_cache(struct brw_cache *cache, void *aux_return) { struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + struct brw_cache_item lookup; + GLuint hash; - item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + lookup.cache_id = cache_id; + lookup.key = key; + lookup.key_size = key_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item == NULL) return NULL; @@ -200,26 +211,34 @@ brw_search_cache(struct brw_cache *cache, } -dri_bo * -brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) +drm_intel_bo * +brw_upload_cache_with_auxdata(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + GLuint aux_size, + void *aux_return) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint hash; GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); - GLuint aux_size = cache->aux_size[cache_id]; void *tmp; dri_bo *bo; int i; + item->cache_id = cache_id; + item->key = key; + item->key_size = key_size; + item->reloc_bufs = reloc_bufs; + item->nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(item); + item->hash = hash; + /* Create the buffer object to contain the data */ bo = dri_bo_alloc(cache->brw->intel.bufmgr, cache->name[cache_id], data_size, 1 << 6); @@ -229,19 +248,15 @@ brw_upload_cache( struct brw_cache *cache, tmp = _mesa_malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size, aux, aux_size); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) dri_bo_reference(reloc_bufs[i]); } - item->cache_id = cache_id; item->key = tmp; - item->hash = hash; - item->key_size = key_size; item->reloc_bufs = tmp + key_size + aux_size; - item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; dri_bo_reference(bo); @@ -255,7 +270,6 @@ brw_upload_cache( struct brw_cache *cache, cache->n_items++; if (aux_return) { - assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } @@ -272,6 +286,23 @@ brw_upload_cache( struct brw_cache *cache, return bo; } +drm_intel_bo * +brw_upload_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size) +{ + return brw_upload_cache_with_auxdata(cache, cache_id, + key, key_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, 0, + NULL); +} /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. @@ -292,11 +323,18 @@ brw_cache_data(struct brw_cache *cache, GLuint nr_reloc_bufs) { dri_bo *bo; - struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - - item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); + struct brw_cache_item *item, lookup; + GLuint hash; + + lookup.cache_id = cache_id; + lookup.key = data; + lookup.key_size = data_size; + lookup.reloc_bufs = reloc_bufs; + lookup.nr_reloc_bufs = nr_reloc_bufs; + hash = hash_key(&lookup); + lookup.hash = hash; + + item = search_cache(cache, hash, &lookup); if (item) { update_cache_last(cache, cache_id, item->bo); dri_bo_reference(item->bo); @@ -306,8 +344,7 @@ brw_cache_data(struct brw_cache *cache, bo = brw_upload_cache(cache, cache_id, data, data_size, reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + data, data_size); return bo; } @@ -321,11 +358,9 @@ enum pool_type { static void brw_init_cache_id(struct brw_cache *cache, const char *name, - enum brw_cache_id id, - GLuint aux_size) + enum brw_cache_id id) { cache->name[id] = strdup(name); - cache->aux_size[id] = aux_size; } @@ -341,80 +376,28 @@ brw_init_non_surface_cache(struct brw_context *brw) cache->items = (struct brw_cache_item **) _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "CC_VP", - BRW_CC_VP, - 0); - - brw_init_cache_id(cache, - "CC_UNIT", - BRW_CC_UNIT, - 0); - - brw_init_cache_id(cache, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_data)); - - brw_init_cache_id(cache, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - 0); - - brw_init_cache_id(cache, - "SAMPLER", - BRW_SAMPLER, - 0); - - brw_init_cache_id(cache, - "WM_UNIT", - BRW_WM_UNIT, - 0); - - brw_init_cache_id(cache, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_data)); - - brw_init_cache_id(cache, - "SF_VP", - BRW_SF_VP, - 0); - - brw_init_cache_id(cache, - "SF_UNIT", - BRW_SF_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_UNIT", - BRW_VS_UNIT, - 0); - - brw_init_cache_id(cache, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_data)); - - brw_init_cache_id(cache, - "CLIP_UNIT", - BRW_CLIP_UNIT, - 0); - - brw_init_cache_id(cache, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_data)); - - brw_init_cache_id(cache, - "GS_UNIT", - BRW_GS_UNIT, - 0); - - brw_init_cache_id(cache, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_data)); + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP); + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT); + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG); + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR); + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER); + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT); + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG); + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP); + + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT); + + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT); + + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG); + + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT); + + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG); + + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT); + + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG); } @@ -430,15 +413,8 @@ brw_init_surface_cache(struct brw_context *brw) cache->items = (struct brw_cache_item **) _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - 0); - - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0); + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE); + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index af8dfb4c15..0ecbef1ef9 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -36,13 +36,7 @@ #include "intel_batchbuffer.h" #include "intel_buffers.h" -/* This is used to initialize brw->state.atoms[]. We could use this - * list directly except for a single atom, brw_constant_buffer, which - * has a .dirty value which changes according to the parameters of the - * current fragment and vertex programs, and so cannot be a static - * value. - */ -const struct brw_tracked_state *atoms[] = +static const struct brw_tracked_state *atoms[] = { &brw_check_fallback, @@ -208,7 +202,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index fd055e225e..44b085e214 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -35,6 +35,7 @@ #include "brw_util.h" #include "brw_state.h" #include "shader/prog_print.h" +#include "shader/prog_parameter.h" @@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw, struct brw_vertex_program *vp, struct brw_vs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; GLuint program_size; const GLuint *program; struct brw_vs_compile c; + int aux_size; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + /* We upload from &c.prog_data including the constant_map assuming + * they're packed together. It would be nice to have a + * compile-time assert macro here. + */ + assert(c.constant_map == (int8_t *)&c.prog_data + + sizeof(c.prog_data)); + assert(ctx->Const.VertexProgram.MaxNativeParameters == + ARRAY_SIZE(c.constant_map)); + + aux_size = sizeof(c.prog_data); + if (c.vp->use_const_buffer) + aux_size += c.vp->program.Base.Parameters->NumParameters; + dri_bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->vs.prog_data ); + brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + aux_size, + &brw->vs.prog_data); } @@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + + sizeof(*brw->vs.prog_data)); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 4a591365c9..95e0501b1e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -51,6 +51,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + int8_t constant_map[1024]; struct brw_vertex_program *vp; @@ -81,6 +82,8 @@ struct brw_vs_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLboolean needs_stack; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 1b84dd505f..52cc04fee8 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -104,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + int constant = 0; + + /* We've got more constants than we can load with the push + * mechanism. This is often correlated with reladdr loads where + * we should probably be using a pull mechanism anyway to avoid + * excessive reading. However, the pull mechanism is slow in + * general. So, we try to allocate as many non-reladdr-loaded + * constants through the push buffer as we can before giving up. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || + inst->SrcReg[arg].RelAddr) + continue; + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } + } + } + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, + (i%2) * 4), + 0, 4, 1); + } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + /* XXX 0 causes a bug elsewhere... */ + c->prog_data.nr_params = MAX2(constant * 4, 4); } else { /* use a section of the GRF for constants */ @@ -214,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->needs_stack) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -762,15 +802,14 @@ get_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; - struct brw_reg const_reg; - struct brw_reg const2_reg; - const GLboolean relAddr = src->RelAddr; + struct brw_reg const_reg = c->current_const[argIndex].reg; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || relAddr) { + if (c->current_const[argIndex].index != src->Index) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; #if 0 @@ -779,48 +818,74 @@ get_constant(struct brw_vs_compile *c, #endif /* need to fetch the constant now */ brw_dp_READ_4_vs(p, - c->current_const[argIndex].reg,/* writeback dest */ + const_reg, /* writeback dest */ 0, /* oword */ - relAddr, /* relative indexing? */ + 0, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - - if (relAddr) { - /* second read */ - const2_reg = get_tmp(c); - - /* use upper half of address reg for second read */ - addrReg = stride(addrReg, 0, 4, 0); - addrReg.subnr = 16; - - brw_dp_READ_4_vs(p, - const2_reg, /* writeback dest */ - 1, /* oword */ - relAddr, /* relative indexing? */ - addrReg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER - ); - } } - const_reg = c->current_const[argIndex].reg; + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; - if (relAddr) { - /* merge the two Owords into the constant register */ - /* const_reg[7..4] = const2_reg[7..4] */ - brw_MOV(p, - suboffset(stride(const_reg, 0, 4, 1), 4), - suboffset(stride(const2_reg, 0, 4, 1), 4)); - release_tmp(c, const2_reg); - } - else { - /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; - } + return const_reg; +} + +static struct brw_reg +get_reladdr_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg = c->current_const[argIndex].reg; + struct brw_reg const2_reg; + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + assert(argIndex < 3); + + /* Can't reuse a reladdr constant load. */ + c->current_const[argIndex].index = -1; + + #if 0 + printf(" fetch const[a0.x+%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + + /* fetch the first vec4 */ + brw_dp_READ_4_vs(p, + const_reg, /* writeback dest */ + 0, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + /* second vec4 */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + 1, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); return const_reg; } @@ -928,7 +993,13 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { - return get_constant(c, inst, argIndex); + if (!relAddr && c->constant_map[index] != -1) { + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else if (relAddr) + return get_reladdr_constant(c, inst, argIndex); + else + return get_constant(c, inst, argIndex); } else if (relAddr) { return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); @@ -1380,12 +1451,14 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { GLuint i; struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + + /* Message registers can't be read, so copy the output into GRF + * register if they are used in source registers + */ for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; GLuint index = src->Index; @@ -1393,12 +1466,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) c->output_regs[index].used_in_src = GL_TRUE; } + + switch (inst->Opcode) { + case OPCODE_CAL: + case OPCODE_RET: + c->needs_stack = GL_TRUE; + break; + default: + break; + } } /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + if (c->needs_stack) + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 345ffa7ee1..fd9f2fee42 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -164,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + &vs, sizeof(vs)); /* Emit VS program relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 3bc9840a97..3f6e16fcb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -168,8 +168,7 @@ brw_vs_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_VS_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 72749b3859..bb7a293812 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -172,12 +172,6 @@ static void brw_new_batch( struct intel_context *intel ) } } - -static void brw_note_fence( struct intel_context *intel, GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -193,7 +187,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.update_texture_state = 0; brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6895f64410..fb24379c90 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -199,12 +199,13 @@ static void do_wm_prog( struct brw_context *brw, program = brw_get_program(&c->func, &program_size); dri_bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); + brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + sizeof(c->prog_data), + &brw->wm.prog_data); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index ad267a4e6a..87387b1e2d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -326,8 +326,7 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler), - NULL, NULL); + &sampler, sizeof(sampler)); /* Emit SDC relocations */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index d3373ea79e..a7f80db554 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -210,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, key, sizeof(*key), reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + &wm, sizeof(wm)); /* Emit WM program relocation */ dri_bo_emit_reloc(bo, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f26cfabb7d..357c8c90de 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -256,8 +256,7 @@ brw_create_texture_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (key->bo) { /* Emit relocation to surface contents */ @@ -351,8 +350,7 @@ brw_create_constant_surface( struct brw_context *brw, bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (key->bo) { /* Emit relocation to surface contents. Section 5.1.1 of the gen4 @@ -653,8 +651,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, - &surf, sizeof(surf), - NULL, NULL); + &surf, sizeof(surf)); if (region_bo != NULL) { /* We might sample from it, and we might render to it, so flag * them both. We might be able to figure out from other state @@ -701,8 +698,7 @@ brw_wm_get_binding_table(struct brw_context *brw) bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size, - NULL, NULL); + data, data_size); /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_WM_MAX_SURF; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 3f6634c65a..d52fe2eef2 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -506,27 +506,7 @@ intelFlush(GLcontext * ctx) static void intel_glFlush(GLcontext *ctx) { - struct intel_context *intel = intel_context(ctx); - intel_flush(ctx, GL_TRUE); - - /* We're using glFlush as an indicator that a frame is done, which is - * what DRI2 does before calling SwapBuffers (and means we should catch - * people doing front-buffer rendering, as well).. - * - * Wait for the swapbuffers before the one we just emitted, so we don't - * get too many swaps outstanding for apps that are GPU-heavy but not - * CPU-heavy. - * - * Unfortunately, we don't have a handle to the batch containing the swap, - * and getting our hands on that doesn't seem worth it, so we just us the - * first batch we emitted after the last swap. - */ - if (intel->first_post_swapbuffers_batch != NULL) { - drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); - drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; - } } void diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 07207bfbec..6ba281cc14 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -107,7 +107,6 @@ struct intel_context void (*finish_batch) (struct intel_context * intel); void (*new_batch) (struct intel_context * intel); void (*emit_invarient_state) (struct intel_context * intel); - void (*note_fence) (struct intel_context *intel, GLuint fence); void (*update_texture_state) (struct intel_context * intel); void (*render_start) (struct intel_context * intel); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e240957197..6c2cb3b57e 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -128,8 +128,29 @@ intelDRI2Flush(__DRIdrawable *drawable) static void intelDRI2FlushInvalidate(__DRIdrawable *drawable) { + struct intel_context *intel = drawable->driContextPriv->driverPrivate; + intelDRI2Flush(drawable); drawable->validBuffers = GL_FALSE; + + /* We're using FlushInvalidate as an indicator that a frame is + * done. It's only called immediately after SwapBuffers, so it + * won't affect front-buffer rendering or applications explicitly + * managing swap regions using MESA_copy_buffer. + * + * Wait for the swapbuffers before the one we just emitted, so we don't + * get too many swaps outstanding for apps that are GPU-heavy but not + * CPU-heavy. + * + * Unfortunately, we don't have a handle to the batch containing the swap, + * and getting our hands on that doesn't seem worth it, so we just use the + * first batch we emitted after the last swap. + */ + if (intel->first_post_swapbuffers_batch != NULL) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + } } static const struct __DRI2flushExtensionRec intelFlushExtension = { diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index 8212dc1203..ca33faff87 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -29,8 +29,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_queryobj.c \ radeon_span.c \ - radeon_texture.c - + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = r200_context.c \ r200_ioctl.c \ @@ -46,6 +46,7 @@ DRIVER_SOURCES = r200_context.c \ r200_sanity.c \ r200_fragshader.c \ r200_vertprog.c \ + r200_blit.c \ radeon_screen.c \ $(EGL_SOURCES) \ $(RADEON_COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c new file mode 100644 index 0000000000..f899f7efdc --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_blit.c @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r200_context.h" +#include "r200_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r200_context *r200) +{ + BATCH_LOCALS(&r200->radeon); + + BEGIN_BATCH(14); + if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS); + } + OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE | + (9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT))); + OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY); + OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT)); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r200_context *r200, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = R200_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r200->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= R200_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= R200_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(28); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0); + OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO | + R200_TXC_ARG_B_ZERO | + R200_TXC_ARG_C_R0_COLOR | + R200_TXC_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO | + R200_TXA_ARG_B_ZERO | + R200_TXA_ARG_C_R0_ALPHA | + R200_TXA_OP_MADD)); + OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); + OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST | + R200_CLAMP_T_CLAMP_LAST | + R200_MAG_FILTER_NEAREST | + R200_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0); + OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r200_context *r200, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r200->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(22); + OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0); + OUT_BATCH_REGVAL(R200_RE_CNTL, 0); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r200_context *r200, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(r200->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs, + first_elem(&r200->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[1]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r200_context *r200, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r200->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(14); + OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16)); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r200 r200 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r200_context *r200 = R200_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(r200->radeon.glCtx); + + rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__); + + if (!validate_buffers(r200, src_bo, dst_bo)) + return GL_FALSE; + + /* 14 */ + emit_vtx_state(r200); + /* 28 */ + emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 22 */ + emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 14 */ + emit_draw_packet(r200, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r200/r200_blit.h b/src/mesa/drivers/dri/r200/r200_blit.h new file mode 100644 index 0000000000..38487266ae --- /dev/null +++ b/src/mesa/drivers/dri/r200/r200_blit.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef R200_BLIT_H +#define R200_BLIT_H + +void r200_blit_init(struct r200_context *r200); + +unsigned r200_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // R200_BLIT_H diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index f34e319222..3d6d0f5ec0 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -61,6 +61,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_maos.h" #include "r200_vertprog.h" #include "radeon_queryobj.h" +#include "r200_blit.h" #include "radeon_span.h" @@ -268,6 +269,7 @@ static void r200_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = r200Fallback; radeon->vtbl.update_scissor = r200_vtbl_update_scissor; radeon->vtbl.emit_query_finish = r200_emit_query_finish; + radeon->vtbl.blit = r200_blit; } @@ -294,6 +296,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r200_init_vtbl(&rmesa->radeon); /* init exp fog table data */ r200InitStaticFogData(); @@ -326,10 +329,14 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); r200InitStateFuncs(&functions); - r200InitTextureFuncs(&functions); + r200InitTextureFuncs(&rmesa->radeon, &functions); r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); + if (rmesa->radeon.radeonScreen->kernel_mm) { + r200_init_texcopy_functions(&functions); + } + if (!radeonInitContext(&rmesa->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 17e4d8962e..a9dce310ae 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -645,6 +645,8 @@ extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv, __DRIdrawable *driReadPriv ); extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv ); +extern void r200_init_texcopy_functions(struct dd_function_table *table); + /* ================================================================ * Debugging: */ diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 5b87ba6ccd..0916df6476 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -477,7 +477,7 @@ static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, -void r200InitTextureFuncs( struct dd_function_table *functions ) +void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -511,6 +511,11 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index e122de6e5e..1a1e7038df 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -48,7 +48,7 @@ extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t ); -extern void r200InitTextureFuncs( struct dd_function_table *functions ); +extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); extern void r200UpdateFragmentShader( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r200/radeon_tex_copy.c b/src/mesa/drivers/dri/r200/radeon_tex_copy.c new file mode 120000 index 0000000000..dfa5ba34e6 --- /dev/null +++ b/src/mesa/drivers/dri/r200/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index be005bd164..0d0fbcc408 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_span.c \ radeon_queryobj.c \ - radeon_texture.c + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -50,7 +51,6 @@ DRIVER_SOURCES = \ r300_state.c \ r300_render.c \ r300_tex.c \ - r300_texcopy.c \ r300_texstate.c \ r300_vertprog.c \ r300_fragprog_common.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 731adc1af2..f27f858652 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -23,6 +23,8 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H +#include "../../../../main/compiler.h" + #include "memory_pool.h" #include "radeon_code.h" #include "radeon_program.h" diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 2eec27e900..e24c7955d4 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -150,8 +150,8 @@ static void r300_emit_tx_setup(struct r300_context *r300, (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) | (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) | R300_TX_MIN_FILTER_MIP_NONE | - R300_TX_MIN_FILTER_LINEAR | - R300_TX_MAG_FILTER_LINEAR | + R300_TX_MIN_FILTER_NEAREST | + R300_TX_MAG_FILTER_NEAREST | (0 << 28)); OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0); OUT_BATCH_REGVAL(R300_TX_SIZE_0, @@ -403,9 +403,8 @@ static void calc_tex_coords(float img_width, float img_height, buf[3] = buf[2] + reg_height / img_height; if (flip_y) { - float tmp = buf[2]; - buf[2] = 1.0 - buf[3]; - buf[3] = 1.0 - tmp; + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; } } @@ -424,13 +423,13 @@ static void emit_draw_packet(struct r300_context *r300, flip_y, texcoords); float verts[] = { dst_x_offset, dst_y_offset, - texcoords[0], texcoords[3], - dst_x_offset, dst_y_offset + reg_height, texcoords[0], texcoords[2], + dst_x_offset, dst_y_offset + reg_height, + texcoords[0], texcoords[3], dst_x_offset + reg_width, dst_y_offset + reg_height, - texcoords[1], texcoords[2], + texcoords[1], texcoords[3], dst_x_offset + reg_width, dst_y_offset, - texcoords[1], texcoords[3] }; + texcoords[1], texcoords[2] }; BATCH_LOCALS(&r300->radeon); @@ -495,6 +494,27 @@ static void emit_cb_setup(struct r300_context *r300, END_BATCH(); } +static unsigned is_blit_supported(gl_format dst_format) +{ + switch (dst_format) { + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888: + break; + default: + return 0; + } + + if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + /** * Copy a region of [@a width x @a height] pixels from source buffer * to destination buffer. @@ -519,29 +539,31 @@ static void emit_cb_setup(struct r300_context *r300, * @param[in] height region height * @param[in] flip_y set if y coords of the source image need to be flipped */ -GLboolean r300_blit(struct r300_context *r300, - struct radeon_bo *src_bo, - intptr_t src_offset, - gl_format src_mesaformat, - unsigned src_pitch, - unsigned src_width, - unsigned src_height, - unsigned src_x_offset, - unsigned src_y_offset, - struct radeon_bo *dst_bo, - intptr_t dst_offset, - gl_format dst_mesaformat, - unsigned dst_pitch, - unsigned dst_width, - unsigned dst_height, - unsigned dst_x_offset, - unsigned dst_y_offset, - unsigned reg_width, - unsigned reg_height, - unsigned flip_y) +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) { - if (_mesa_get_format_bits(src_mesaformat, GL_DEPTH_BITS) > 0) - return GL_FALSE; + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return 0; /* Make sure that colorbuffer has even width - hw limitation */ if (dst_pitch % 2 > 0) @@ -551,7 +573,7 @@ GLboolean r300_blit(struct r300_context *r300, * Looks like a hw limitation. */ if (dst_pitch < 32) - return GL_FALSE; + return 0; /* Need to clamp the region size to make sure * we don't read outside of the source buffer @@ -567,6 +589,10 @@ GLboolean r300_blit(struct r300_context *r300, reg_height = dst_height - dst_y_offset; if (src_bo == dst_bo) { + return 0; + } + + if (src_offset % 32 || dst_offset % 32) { return GL_FALSE; } @@ -587,7 +613,7 @@ GLboolean r300_blit(struct r300_context *r300, radeonFlush(r300->radeon.glCtx); if (!validate_buffers(r300, src_bo, dst_bo)) - return GL_FALSE; + return 0; rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); @@ -618,5 +644,5 @@ GLboolean r300_blit(struct r300_context *r300, radeonFlush(r300->radeon.glCtx); - return GL_TRUE; -}
\ No newline at end of file + return 1; +} diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h index dc21e88098..735acaddd7 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.h +++ b/src/mesa/drivers/dri/r300/r300_blit.h @@ -30,25 +30,25 @@ void r300_blit_init(struct r300_context *r300); -GLboolean r300_blit(struct r300_context *r300, - struct radeon_bo *src_bo, - intptr_t src_offset, - gl_format src_mesaformat, - unsigned src_pitch, - unsigned src_width, - unsigned src_height, - unsigned src_x_offset, - unsigned src_y_offset, - struct radeon_bo *dst_bo, - intptr_t dst_offset, - gl_format dst_mesaformat, - unsigned dst_pitch, - unsigned dst_width, - unsigned dst_height, - unsigned dst_x_offset, - unsigned dst_y_offset, - unsigned width, - unsigned height, - unsigned flip_y); +unsigned r300_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); #endif // R300_BLIT_H
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 1f6ccf6ddc..bb0e6db313 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -93,8 +93,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/remap_helper.h" -void r300_init_texcopy_functions(struct dd_function_table *table); - static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, @@ -326,6 +324,8 @@ static void r300_init_vtbl(radeonContextPtr radeon) radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z; } else radeon->vtbl.emit_query_finish = r300_emit_query_finish; + + radeon->vtbl.blit = r300_blit; } static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -488,15 +488,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); r300InitStateFuncs(&functions); - r300InitTextureFuncs(&functions); + r300InitTextureFuncs(&r300->radeon, &functions); r300InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); radeonInitBufferObjectFuncs(&functions); - if (r300->radeon.radeonScreen->kernel_mm) { - r300_init_texcopy_functions(&functions); - } - if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 546cd8ddde..78ab43a99f 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -554,8 +554,6 @@ extern void r300InitShaderFunctions(r300ContextPtr r300); extern void r300InitDraw(GLcontext *ctx); -extern void r300_init_texcopy_functions(struct dd_function_table *table); - #define r300PackFloat32 radeonPackFloat32 #define r300PackFloat24 radeonPackFloat24 diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 963f648cb1..eb5d2d5004 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -312,7 +312,7 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, return &t->base; } -void r300InitTextureFuncs(struct dd_function_table *functions) +void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -340,6 +340,11 @@ void r300InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index 6ede0fe25c..9694e703b8 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -49,7 +49,7 @@ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r300ValidateBuffers(GLcontext * ctx); -extern void r300InitTextureFuncs(struct dd_function_table *functions); +extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); int32_t r300TranslateTexFormat(gl_format mesaFormat); diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c new file mode 120000 index 0000000000..dfa5ba34e6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 26f47b7268..e55d0babd8 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -39,7 +39,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_span.c \ radeon_texture.c \ - radeon_queryobj.c + radeon_queryobj.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_screen.c \ @@ -59,6 +60,7 @@ DRIVER_SOURCES = \ r700_render.c \ r600_tex.c \ r600_texstate.c \ + r600_blit.c \ r700_debug.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c new file mode 100644 index 0000000000..d7cd59ade6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -0,0 +1,1660 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r600_context.h" + +#include "r600_blit.h" +#include "r600_blit_shaders.h" +#include "r600_cmdbuf.h" + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGB565_REV: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB4444_REV: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB1555_REV: + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: + case MESA_FORMAT_RGB332: + case MESA_FORMAT_A8: + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + case MESA_FORMAT_L8: + case MESA_FORMAT_RGBA_FLOAT32: + case MESA_FORMAT_RGBA_FLOAT16: + case MESA_FORMAT_ALPHA_FLOAT32: + case MESA_FORMAT_ALPHA_FLOAT16: + case MESA_FORMAT_LUMINANCE_FLOAT32: + case MESA_FORMAT_LUMINANCE_FLOAT16: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + case MESA_FORMAT_Z24_S8: + case MESA_FORMAT_Z16: + case MESA_FORMAT_Z32: + case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SLA8: + case MESA_FORMAT_SL8: + break; + default: + return 0; + } + + /* ??? */ + /* not sure blit to depth works or not yet */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void +set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format, + int nPitchInPixel, int w, int h, intptr_t dst_offset) +{ + uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0; + int id = 0; + uint32_t comp_swap, format; + BATCH_LOCALS(&context->radeon); + + cb_color0_base = dst_offset / 256; + + SETfield(cb_color0_size, (nPitchInPixel / 8) - 1, + PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); + SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1, + SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); + + SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); + SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + + SETbit(cb_color0_info, BLEND_BYPASS_bit); + + switch(mesa_format) { + case MESA_FORMAT_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SIGNED_RGBA8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_XRGB8888_REV: + format = COLOR_8_8_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565: + format = COLOR_5_6_5; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB565_REV: + format = COLOR_5_6_5; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + format = COLOR_4_4_4_4; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + format = COLOR_1_5_5_5; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88: + format = COLOR_8_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_AL88_REV: + format = COLOR_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGB332: + format = COLOR_3_3_2; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_A8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + format = COLOR_8; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_L8: + format = COLOR_8; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + format = COLOR_32_32_32_32_FLOAT; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + format = COLOR_16_16_16_16_FLOAT; + comp_swap = SWAP_STD_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: + format = COLOR_32_FLOAT; + comp_swap = SWAP_ALT; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: + format = COLOR_16_FLOAT; + comp_swap = SWAP_ALT; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + format = COLOR_32_32_FLOAT; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + format = COLOR_16_16_FLOAT; + comp_swap = SWAP_ALT_REV; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + format = COLOR_32_FLOAT; + comp_swap = SWAP_STD; + SETbit(cb_color0_info, BLEND_FLOAT32_bit); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + format = COLOR_16_FLOAT; + comp_swap = SWAP_STD; + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + format = COLOR_8_24; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z24_S8: + format = COLOR_24_8; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z16: + format = COLOR_16; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_Z32: + format = COLOR_32; + comp_swap = SWAP_STD; + SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1, + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); + CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SRGBA8: + format = COLOR_8_8_8_8; + comp_swap = SWAP_STD_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SLA8: + format = COLOR_8_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + case MESA_FORMAT_SL8: + format = COLOR_8; + comp_swap = SWAP_ALT_REV; + SETbit(cb_color0_info, SOURCE_FORMAT_bit); + SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + } + + SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift, + CB_COLOR0_INFO__FORMAT_mask); + SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask); + + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && + (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { + BEGIN_BATCH_NO_AUTOSTATE(2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0)); + R600_OUT_BATCH((2 << id)); + END_BATCH(); + } + + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(cb_color0_base); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(12); + R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); + R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void load_shaders(GLcontext * ctx) +{ + + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + context_t *context = R700_CONTEXT(ctx); + int i, size; + uint32_t *shader; + + if (context->blit_bo_loaded == 1) + return; + + size = 4096; + context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0, + size, 256, RADEON_GEM_DOMAIN_GTT, 0); + radeon_bo_map(context->blit_bo, 1); + shader = context->blit_bo->ptr; + + for(i=0; i<sizeof(r6xx_vs)/4; i++) { + shader[128+i] = r6xx_vs[i]; + } + for(i=0; i<sizeof(r6xx_ps)/4; i++) { + shader[256+i] = r6xx_ps[i]; + } + + radeon_bo_unmap(context->blit_bo); + context->blit_bo_loaded = 1; + +} + +static inline void +set_shaders(context_t *context) +{ + struct radeon_bo * pbo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + uint32_t sq_pgm_start_fs = (512 >> 8); + uint32_t sq_pgm_resources_fs = 0; + uint32_t sq_pgm_cf_offset_fs = 0; + + uint32_t sq_pgm_start_vs = (512 >> 8); + uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_vs = 0; + + uint32_t sq_pgm_start_ps = (1024 >> 8); + uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_ps = 0; + uint32_t sq_pgm_exports_ps = (1 << 1); + + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + + /* FS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); + R600_OUT_BATCH(sq_pgm_start_fs); + R600_OUT_BATCH_RELOC(sq_pgm_start_fs, + pbo, + sq_pgm_start_fs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs); + END_BATCH(); + + /* VS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(sq_pgm_start_vs); + R600_OUT_BATCH_RELOC(sq_pgm_start_vs, + pbo, + sq_pgm_start_vs, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs); + END_BATCH(); + + /* PS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(sq_pgm_start_ps); + R600_OUT_BATCH_RELOC(sq_pgm_start_ps, + pbo, + sq_pgm_start_ps, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(18); + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1 + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0); + R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0); + R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vtx_resource(context_t *context) +{ + struct radeon_bo *bo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); + R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); + END_BATCH(); + COMMIT_BATCH(); + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(0); + R600_OUT_BATCH(48 - 1); + R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift); + R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift); + R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0, + bo, + SQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + +static inline void +set_tex_resource(context_t * context, + gl_format mesa_format, struct radeon_bo *bo, int w, int h, + int TexelPitch, intptr_t src_offset) +{ + uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6; + + sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0; + BATCH_LOCALS(&context->radeon); + + SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + + switch (mesa_format) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } + break; + case MESA_FORMAT_ARGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_XRGB8888_REV: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB565_REV: + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB4444_REV: + SETfield(sq_tex_resource1, FMT_4_4_4_4, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ARGB1555_REV: + SETfield(sq_tex_resource1, FMT_1_5_5_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: /* TODO : Check this. */ + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGB332: + SETfield(sq_tex_resource1, FMT_3_3_2, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_L8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_I8: /* X, X, X, X */ + case MESA_FORMAT_CI8: + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_RGBA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + SETfield(sq_tex_resource1, FMT_32_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + SETfield(sq_tex_resource1, FMT_16_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_32_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */ + SETfield(sq_tex_resource1, FMT_16_FLOAT, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z16: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_16, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_X8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8_Z24: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z24_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_24_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_0, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_Z32: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_32, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_S8: + SETbit(sq_tex_resource0, TILE_TYPE_bit); + SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + break; + case MESA_FORMAT_SRGBA8: + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SLA8: + SETfield(sq_tex_resource1, FMT_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + case MESA_FORMAT_SL8: /* X, X, X, ONE */ + SETfield(sq_tex_resource1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return; + }; + + SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask); + SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); + SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); + + sq_tex_resource2 = src_offset / 256; + + SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(0 * 7); + + R600_OUT_BATCH(sq_tex_resource0); + R600_OUT_BATCH(sq_tex_resource1); + R600_OUT_BATCH(sq_tex_resource2); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3 + R600_OUT_BATCH(sq_tex_resource4); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5 + R600_OUT_BATCH(sq_tex_resource6); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_tex_sampler(context_t * context) +{ + uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0; + int i = 0; + + SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); + + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(sq_tex_sampler_word0); + R600_OUT_BATCH(sq_tex_sampler_word1); + R600_OUT_BATCH(sq_tex_sampler_word2); + END_BATCH(); + +} + +static inline void +set_scissors(context_t *context, int x1, int y1, int x2, int y2) +{ + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(17); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3); + R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */ + R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 ); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + END_BATCH(); + + COMMIT_BATCH(); + +} + +static inline void +set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y, + int w, int h, int src_h, unsigned flip_y) +{ + float *vb; + radeon_bo_map(context->blit_bo, 1); + vb = context->blit_bo->ptr; + + vb[0] = (float)(dst_x); + vb[1] = (float)(dst_y); + vb[2] = (float)(src_x); + vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y; + + vb[4] = (float)(dst_x); + vb[5] = (float)(dst_y + h); + vb[6] = (float)(src_x); + vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + vb[8] = (float)(dst_x + w); + vb[9] = (float)(dst_y + h); + vb[10] = (float)(src_x + w); + vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); + + radeon_bo_unmap(context->blit_bo); + +} + +static inline void +draw_auto(context_t *context) +{ + BATCH_LOCALS(&context->radeon); + uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices; + + SETfield(vgt_primitive_type, DI_PT_RECTLIST, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, + INDEX_TYPE_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, + MAJOR_MODE_mask); + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, + SOURCE_SELECT_mask); + + vgt_num_indices = 3; + + BEGIN_BATCH_NO_AUTOSTATE(10); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_default_state(context_t *context) +{ + int ps_prio = 0; + int vs_prio = 1; + int gs_prio = 2; + int es_prio = 3; + int num_ps_gprs; + int num_vs_gprs; + int num_gs_gprs; + int num_es_gprs; + int num_temp_gprs; + int num_ps_threads; + int num_vs_threads; + int num_gs_threads; + int num_es_threads; + int num_ps_stack_entries; + int num_vs_stack_entries; + int num_gs_stack_entries; + int num_es_stack_entries; + uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; + uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; + uint32_t ta_cntl_aux, db_watermarks, sq_dyn_gpr_cntl_ps_flush_req, db_debug; + BATCH_LOCALS(&context->radeon); + + switch (context->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R600: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV630: + case CHIP_FAMILY_RV635: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 40; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV610: + case CHIP_FAMILY_RV620: + case CHIP_FAMILY_RS780: + case CHIP_FAMILY_RS880: + default: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV670: + num_ps_gprs = 144; + num_vs_gprs = 40; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 136; + num_vs_threads = 48; + num_gs_threads = 4; + num_es_threads = 4; + num_ps_stack_entries = 40; + num_vs_stack_entries = 40; + num_gs_stack_entries = 32; + num_es_stack_entries = 16; + break; + case CHIP_FAMILY_RV770: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 256; + num_vs_stack_entries = 256; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV730: + case CHIP_FAMILY_RV740: + num_ps_gprs = 84; + num_vs_gprs = 36; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 188; + num_vs_threads = 60; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + case CHIP_FAMILY_RV710: + num_ps_gprs = 192; + num_vs_gprs = 56; + num_temp_gprs = 4; + num_gs_gprs = 0; + num_es_gprs = 0; + num_ps_threads = 144; + num_vs_threads = 48; + num_gs_threads = 0; + num_es_threads = 0; + num_ps_stack_entries = 128; + num_vs_stack_entries = 128; + num_gs_stack_entries = 0; + num_es_stack_entries = 0; + break; + } + + sq_config = 0; + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + CLEARbit(sq_config, VC_ENABLE_bit); + else + SETbit(sq_config, VC_ENABLE_bit); + SETbit(sq_config, DX9_CONSTS_bit); + SETbit(sq_config, ALU_INST_PREFER_VECTOR_bit); + SETfield(sq_config, ps_prio, PS_PRIO_shift, PS_PRIO_mask); + SETfield(sq_config, vs_prio, VS_PRIO_shift, VS_PRIO_mask); + SETfield(sq_config, gs_prio, GS_PRIO_shift, GS_PRIO_mask); + SETfield(sq_config, es_prio, ES_PRIO_shift, ES_PRIO_mask); + + sq_gpr_resource_mgmt_1 = 0; + SETfield(sq_gpr_resource_mgmt_1, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_1, num_temp_gprs, + NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask); + + sq_gpr_resource_mgmt_2 = 0; + SETfield(sq_gpr_resource_mgmt_2, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask); + SETfield(sq_gpr_resource_mgmt_2, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask); + + sq_thread_resource_mgmt = 0; + SETfield(sq_thread_resource_mgmt, num_ps_threads, + NUM_PS_THREADS_shift, NUM_PS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_vs_threads, + NUM_VS_THREADS_shift, NUM_VS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_gs_threads, + NUM_GS_THREADS_shift, NUM_GS_THREADS_mask); + SETfield(sq_thread_resource_mgmt, num_es_threads, + NUM_ES_THREADS_shift, NUM_ES_THREADS_mask); + + sq_stack_resource_mgmt_1 = 0; + SETfield(sq_stack_resource_mgmt_1, num_ps_stack_entries, + NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_1, num_vs_stack_entries, + NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask); + + sq_stack_resource_mgmt_2 = 0; + SETfield(sq_stack_resource_mgmt_2, num_gs_stack_entries, + NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask); + SETfield(sq_stack_resource_mgmt_2, num_es_stack_entries, + NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask); + + ta_cntl_aux = 0; + SETfield(ta_cntl_aux, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask); + db_watermarks = 0; + SETfield(db_watermarks, 4, DEPTH_FREE_shift, DEPTH_FREE_mask); + SETfield(db_watermarks, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask); + SETfield(db_watermarks, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask); + SETfield(db_watermarks, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask); + sq_dyn_gpr_cntl_ps_flush_req = 0; + db_debug = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { + SETfield(ta_cntl_aux, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + db_debug = 0x82000000; + SETfield(db_watermarks, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + } else { + SETfield(ta_cntl_aux, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask); + SETfield(db_watermarks, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask); + SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); + } + + BEGIN_BATCH_NO_AUTOSTATE(117); + R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); + R600_OUT_BATCH(sq_config); + R600_OUT_BATCH(sq_gpr_resource_mgmt_1); + R600_OUT_BATCH(sq_gpr_resource_mgmt_2); + R600_OUT_BATCH(sq_thread_resource_mgmt); + R600_OUT_BATCH(sq_stack_resource_mgmt_1); + R600_OUT_BATCH(sq_stack_resource_mgmt_2); + + R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, ta_cntl_aux); + R600_OUT_BATCH_REGVAL(VC_ENHANCE, 0); + R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, sq_dyn_gpr_cntl_ps_flush_req); + R600_OUT_BATCH_REGVAL(DB_DEBUG, db_debug); + R600_OUT_BATCH_REGVAL(DB_WATERMARKS, db_watermarks); + + R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL, + (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); + R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); + R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); + R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift)); + + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)); + R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | + (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + (X_1_256TH << QUANT_MODE_shift)); + + R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); + R600_OUT_BATCH(2048); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, 0); + R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, 0); + + R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + + R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0); + + END_BATCH(); + COMMIT_BATCH(); +} + +static GLboolean validate_buffers(context_t *rmesa, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, + rmesa->blit_bo, RADEON_GEM_DOMAIN_GTT, 0); + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + rmesa->blit_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + first_elem(&rmesa->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x, + unsigned src_y, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x, + unsigned dst_y, + unsigned w, + unsigned h, + unsigned flip_y) +{ + context_t *context = R700_CONTEXT(ctx); + int id = 0; + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 256 || dst_offset % 256) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n", + src_width, src_height, src_pitch, + _mesa_format_row_stride(src_mesaformat, src_width), + _mesa_get_format_name(src_mesaformat)); + fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n", + dst_width, dst_height, + _mesa_format_row_stride(dst_mesaformat, dst_width), + _mesa_get_format_name(dst_mesaformat)); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + + /* load shaders */ + load_shaders(context->radeon.glCtx); + + if (!validate_buffers(context, src_bo, dst_bo)) + return GL_FALSE; + + /* set clear state */ + /* 117 */ + set_default_state(context); + + /* shaders */ + /* 72 */ + set_shaders(context); + + /* src */ + /* 20 */ + set_tex_resource(context, src_mesaformat, src_bo, + src_width, src_height, src_pitch, src_offset); + + /* 5 */ + set_tex_sampler(context); + + /* dst */ + /* 27 */ + set_render_target(context, dst_bo, dst_mesaformat, + dst_pitch, dst_width, dst_height, dst_offset); + /* scissors */ + /* 17 */ + set_scissors(context, dst_x, dst_y, dst_x + dst_width, dst_y + dst_height); + + set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y); + /* Vertex buffer setup */ + /* 24 */ + set_vtx_resource(context); + + /* draw */ + /* 10 */ + draw_auto(context); + + /* 7 */ + r700SyncSurf(context, dst_bo, 0, + RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + /* 5 */ + r700WaitForIdleClean(context); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h new file mode 100644 index 0000000000..f280e23489 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit.h @@ -0,0 +1,21 @@ +unsigned r600_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned w, + unsigned h, + unsigned flip_y); + diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h new file mode 100644 index 0000000000..492dde9636 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h @@ -0,0 +1,28 @@ +const uint32_t r6xx_vs[] = +{ + 0x00000004, // CF_DWORD0(ADDR(4)) + 0x81000000, // SQ_CF_INST_VTX COUNT(1) + 0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0) + 0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1) + 0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0) + 0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0) + 0x00000000, + 0x00000000, + 0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16) + 0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED + 0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1) + 0x00000000, // VTX_DWORD_PAD +}; + +const uint32_t r6xx_ps[] = +{ + 0x00000002, // CF_DWORD0 AADR(2) + 0x80800000, // SQ_CF_INST_TEX COUNT(1) + 0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0) + 0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW + 0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0) + 0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED + 0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW + 0x00000000, // TEX_DWORD_PAD +}; + diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index cb549497f5..68112c49dc 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -65,6 +65,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_emit.h" #include "radeon_bocs_wrapper.h" #include "radeon_queryobj.h" +#include "r600_blit.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -240,6 +241,7 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; radeon->vtbl.emit_query_finish = r600_emit_query_finish; + radeon->vtbl.blit = r600_blit; } static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) @@ -378,7 +380,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r700InitStateFuncs(&functions); - r600InitTextureFuncs(&functions); + r600InitTextureFuncs(&r600->radeon, &functions); r700InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index a1b4af715e..72c8c869b7 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -148,6 +148,8 @@ struct r600_context { GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; + struct radeon_bo *blit_bo; + GLboolean blit_bo_loaded; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -178,6 +180,8 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); +extern void r700WaitForIdleClean(context_t *context); + extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index f745fe3e8a..71dfd7e059 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -396,7 +396,7 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx, return &t->base; } -void r600InitTextureFuncs(struct dd_function_table *functions) +void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. @@ -424,6 +424,11 @@ void r600InitTextureFuncs(struct dd_function_table *functions) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); diff --git a/src/mesa/drivers/dri/r600/r600_tex.h b/src/mesa/drivers/dri/r600/r600_tex.h index fb0e1a023e..c2141ef5e5 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.h +++ b/src/mesa/drivers/dri/r600/r600_tex.h @@ -58,6 +58,6 @@ extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname, extern GLboolean r600ValidateBuffers(GLcontext * ctx); -extern void r600InitTextureFuncs(struct dd_function_table *functions); +extern void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions); #endif /* __r600_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0ff16b4ddd..c01b2fbb14 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4469,7 +4469,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) } pAsm->D2.dst2.SaturateMode = 1; - pAsm->S[0].src.rtype = pAsm->D.dst.rtype; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = pAsm->D.dst.reg; noswizzle_PVSSRC(&(pAsm->S[0].src)); noneg_PVSSRC(&(pAsm->S[0].src)); @@ -5090,15 +5090,15 @@ void add_return_inst(r700_AssemblerBase *pAsm) { if(GL_FALSE == add_cf_instruction(pAsm) ) { - return GL_FALSE; + return; } //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; - pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -5302,7 +5302,7 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm, GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ pAsm->D.dst.opcode = SQ_OP2_INST_MOV; pAsm->D.dst.op3 = 0; @@ -5353,7 +5353,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) GLboolean testFlag(r700_AssemblerBase *pAsm) { - GLfloat fLiteral[2] = {0.1, 0.0}; + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ //Test flag GLuint tmp = gethelpr(pAsm); @@ -6123,7 +6123,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm, R700ControlFlowGenericClause* prelude_cf_ptr = NULL; - /* copy srcs to presub inputs */ + /* copy srcs to presub inputs */ pAsm->alu_x_opcode = SQ_CF_INST_ALU; for(i=0; i<uNumValidSrc; i++) { diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 56baf5b0d9..0064d0814f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -619,6 +619,7 @@ GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); +GLboolean assemble_CONT(r700_AssemblerBase *pAsm); GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 3bc2d2ba02..1a1a87c3cf 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -453,13 +453,31 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * R600_OUT_BATCH((2 << id)); END_BATCH(); } + /* Set CMASK & TILE buffer to the offset of color buffer as + * we don't use those this shouldn't cause any issue and we + * then have a valid cmd stream + */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); + R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_BASE.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(18); + BEGIN_BATCH_NO_AUTOSTATE(12); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_TILE + (4 * id), r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_FRAG + (4 * id), r700->render_target[id].CB_COLOR0_FRAG.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index eab27cbd84..3a6210c53a 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -422,7 +422,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, } /* start 3d, idle, cb/db flush */ -#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 +#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 18 static GLuint r700PredictRenderSize(GLcontext* ctx, const struct _mesa_prim *prim, diff --git a/src/mesa/drivers/dri/r600/radeon_tex_copy.c b/src/mesa/drivers/dri/r600/radeon_tex_copy.c new file mode 120000 index 0000000000..dfa5ba34e6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_tex_copy.c @@ -0,0 +1 @@ +../radeon/radeon_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index 2b2f2c4aa7..c776be0e60 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -26,7 +26,8 @@ RADEON_COMMON_SOURCES = \ radeon_mipmap_tree.c \ radeon_queryobj.c \ radeon_span.c \ - radeon_texture.c + radeon_texture.c \ + radeon_tex_copy.c DRIVER_SOURCES = \ radeon_context.c \ @@ -40,6 +41,7 @@ DRIVER_SOURCES = \ radeon_swtcl.c \ radeon_maos.c \ radeon_sanity.c \ + radeon_blit.c \ $(RADEON_COMMON_SOURCES) C_SOURCES = \ diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c new file mode 100644 index 0000000000..0df4fbb33c --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_blit.c @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "radeon_context.h" +#include "radeon_blit.h" + +static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (count) + return CP_PACKET0(reg, count - 1); + return CP_PACKET2; +} + +/* common formats supported as both textures and render targets */ +static unsigned is_blit_supported(gl_format mesa_format) +{ + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_A8: + break; + default: + return 0; + } + + /* ??? */ + if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) + return 0; + + return 1; +} + +static inline void emit_vtx_state(struct r100_context *r100) +{ + BATCH_LOCALS(&r100->radeon); + + BEGIN_BATCH(8); + if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0); + } else { + OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); + + } + OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_TEX1_W_ROUTING_USE_W0)); + OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0); + OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX)); + END_BATCH(); +} + +static void inline emit_tx_setup(struct r100_context *r100, + gl_format mesa_format, + struct radeon_bo *bo, + intptr_t offset, + unsigned width, + unsigned height, + unsigned pitch) +{ + uint32_t txformat = RADEON_TXFORMAT_NON_POWER2; + BATCH_LOCALS(&r100->radeon); + + assert(width <= 2047); + assert(height <= 2047); + assert(offset % 32 == 0); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_XRGB8888: + txformat |= RADEON_TXFORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + txformat |= RADEON_TXFORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_ARGB1555: + txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + case MESA_FORMAT_A8: + txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP; + break; + default: + break; + } + + BEGIN_BATCH(18); + OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); + OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO | + RADEON_COLOR_ARG_B_ZERO | + RADEON_COLOR_ARG_C_T0_COLOR | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO | + RADEON_ALPHA_ARG_B_ZERO | + RADEON_ALPHA_ARG_C_T0_ALPHA | + RADEON_BLEND_CTL_ADD | + RADEON_CLAMP_TX)); + OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST | + RADEON_CLAMP_T_CLAMP_LAST | + RADEON_MAG_FILTER_NEAREST | + RADEON_MIN_FILTER_NEAREST)); + OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat); + OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) | + ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); + OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); + + OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1); + OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + + END_BATCH(); +} + +static inline void emit_cb_setup(struct r100_context *r100, + struct radeon_bo *bo, + intptr_t offset, + gl_format mesa_format, + unsigned pitch, + unsigned width, + unsigned height) +{ + uint32_t dst_pitch = pitch; + uint32_t dst_format = 0; + BATCH_LOCALS(&r100->radeon); + + /* XXX others? BE/LE? */ + switch (mesa_format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case MESA_FORMAT_RGB565: + dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case MESA_FORMAT_ARGB4444: + dst_format = RADEON_COLOR_FORMAT_ARGB4444; + break; + case MESA_FORMAT_ARGB1555: + dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case MESA_FORMAT_A8: + dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + break; + } + + BEGIN_BATCH_NO_AUTOSTATE(18); + OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); + OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) | + (height << RADEON_RE_HEIGHT_SHIFT))); + OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); + OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); + + OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); + OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); + OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); +} + +static GLboolean validate_buffers(struct r100_context *r100, + struct radeon_bo *src_bo, + struct radeon_bo *dst_bo) +{ + int ret; + radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs, + src_bo, RADEON_GEM_DOMAIN_VRAM, 0); + + radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs, + dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + + ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, + first_elem(&r100->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Calculate texcoords for given image region. + * Output values are [minx, maxx, miny, maxy] + */ +static inline void calc_tex_coords(float img_width, float img_height, + float x, float y, + float reg_width, float reg_height, + unsigned flip_y, float *buf) +{ + buf[0] = x / img_width; + buf[1] = buf[0] + reg_width / img_width; + buf[2] = y / img_height; + buf[3] = buf[2] + reg_height / img_height; + if (flip_y) + { + buf[2] = 1.0 - buf[2]; + buf[3] = 1.0 - buf[3]; + } +} + +static inline void emit_draw_packet(struct r100_context *r100, + unsigned src_width, unsigned src_height, + unsigned src_x_offset, unsigned src_y_offset, + unsigned dst_x_offset, unsigned dst_y_offset, + unsigned reg_width, unsigned reg_height, + unsigned flip_y) +{ + float texcoords[4]; + float verts[12]; + BATCH_LOCALS(&r100->radeon); + + calc_tex_coords(src_width, src_height, + src_x_offset, src_y_offset, + reg_width, reg_height, + flip_y, texcoords); + + verts[0] = dst_x_offset; + verts[1] = dst_y_offset + reg_height; + verts[2] = texcoords[0]; + verts[3] = texcoords[3]; + + verts[4] = dst_x_offset + reg_width; + verts[5] = dst_y_offset + reg_height; + verts[6] = texcoords[1]; + verts[7] = texcoords[3]; + + verts[8] = dst_x_offset + reg_width; + verts[9] = dst_y_offset; + verts[10] = texcoords[1]; + verts[11] = texcoords[2]; + + BEGIN_BATCH(15); + OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16)); + OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (3 << 16)); + OUT_BATCH_TABLE(verts, 12); + END_BATCH(); +} + +/** + * Copy a region of [@a width x @a height] pixels from source buffer + * to destination buffer. + * @param[in] r100 r100 context + * @param[in] src_bo source radeon buffer object + * @param[in] src_offset offset of the source image in the @a src_bo + * @param[in] src_mesaformat source image format + * @param[in] src_pitch aligned source image width + * @param[in] src_width source image width + * @param[in] src_height source image height + * @param[in] src_x_offset x offset in the source image + * @param[in] src_y_offset y offset in the source image + * @param[in] dst_bo destination radeon buffer object + * @param[in] dst_offset offset of the destination image in the @a dst_bo + * @param[in] dst_mesaformat destination image format + * @param[in] dst_pitch aligned destination image width + * @param[in] dst_width destination image width + * @param[in] dst_height destination image height + * @param[in] dst_x_offset x offset in the destination image + * @param[in] dst_y_offset y offset in the destination image + * @param[in] width region width + * @param[in] height region height + * @param[in] flip_y set if y coords of the source image need to be flipped + */ +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y) +{ + struct r100_context *r100 = R100_CONTEXT(ctx); + + if (!is_blit_supported(dst_mesaformat)) + return GL_FALSE; + + /* Make sure that colorbuffer has even width - hw limitation */ + if (dst_pitch % 2 > 0) + ++dst_pitch; + + /* Rendering to small buffer doesn't work. + * Looks like a hw limitation. + */ + if (dst_pitch < 32) + return GL_FALSE; + + /* Need to clamp the region size to make sure + * we don't read outside of the source buffer + * or write outside of the destination buffer. + */ + if (reg_width + src_x_offset > src_width) + reg_width = src_width - src_x_offset; + if (reg_height + src_y_offset > src_height) + reg_height = src_height - src_y_offset; + if (reg_width + dst_x_offset > dst_width) + reg_width = dst_width - dst_x_offset; + if (reg_height + dst_y_offset > dst_height) + reg_height = dst_height - dst_y_offset; + + if (src_bo == dst_bo) { + return GL_FALSE; + } + + if (src_offset % 32 || dst_offset % 32) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "src: size [%d x %d], pitch %d, " + "offset [%d x %d], format %s, bo %p\n", + src_width, src_height, src_pitch, + src_x_offset, src_y_offset, + _mesa_get_format_name(src_mesaformat), + src_bo); + fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", + dst_pitch, dst_x_offset, dst_y_offset, + _mesa_get_format_name(dst_mesaformat), dst_bo); + fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); + } + + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(ctx); + + rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__); + + if (!validate_buffers(r100, src_bo, dst_bo)) + return GL_FALSE; + + /* 8 */ + emit_vtx_state(r100); + /* 18 */ + emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); + /* 18 */ + emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); + /* 15 */ + emit_draw_packet(r100, src_width, src_height, + src_x_offset, src_y_offset, + dst_x_offset, dst_y_offset, + reg_width, reg_height, + flip_y); + + radeonFlush(ctx); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.h b/src/mesa/drivers/dri/radeon/radeon_blit.h new file mode 100644 index 0000000000..d36366ff79 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_blit.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2010 Advanced Micro Devices, Inc. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_BLIT_H +#define RADEON_BLIT_H + +void r100_blit_init(struct r100_context *r100); + +unsigned r100_blit(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned width, + unsigned height, + unsigned flip_y); + +#endif // RADEON_BLIT_H diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ab79d2dc0f..e397ee8c22 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -518,6 +518,26 @@ struct radeon_context { void (*free_context)(GLcontext *ctx); void (*emit_query_finish)(radeonContextPtr radeon); void (*update_scissor)(GLcontext *ctx); + unsigned (*blit)(GLcontext *ctx, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned reg_width, + unsigned reg_height, + unsigned flip_y); } vtbl; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 3cd305b0a2..6c08a90bbd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" #include "radeon_maos.h" #include "radeon_queryobj.h" +#include "radeon_blit.h" #define need_GL_ARB_occlusion_query #define need_GL_EXT_blend_minmax @@ -202,6 +203,7 @@ static void r100_init_vtbl(radeonContextPtr radeon) radeon->vtbl.fallback = radeonFallback; radeon->vtbl.free_context = r100_vtbl_free_context; radeon->vtbl.emit_query_finish = r100_emit_query_finish; + radeon->vtbl.blit = r100_blit; } /* Create the device specific context. @@ -228,6 +230,7 @@ r100CreateContext( const __GLcontextModes *glVisual, if ( !rmesa ) return GL_FALSE; + rmesa->radeon.radeonScreen = screen; r100_init_vtbl(&rmesa->radeon); /* init exp fog table data */ @@ -257,7 +260,7 @@ r100CreateContext( const __GLcontextModes *glVisual, * (the texture functions are especially important) */ _mesa_init_driver_functions( &functions ); - radeonInitTextureFuncs( &functions ); + radeonInitTextureFuncs( &rmesa->radeon, &functions ); radeonInitQueryObjFunctions(&functions); if (!radeonInitContext(&rmesa->radeon, &functions, diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index dfedc38bfd..d84760bf74 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -453,7 +453,6 @@ struct r100_context { extern GLboolean r100CreateContext( const __GLcontextModes *glVisual, __DRIcontext *driContextPriv, void *sharedContextPrivate); - #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 14163f13af..882ee5c194 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -434,7 +434,7 @@ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) -void radeonInitTextureFuncs( struct dd_function_table *functions ) +void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; functions->TexImage1D = radeonTexImage1D; @@ -455,6 +455,11 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyTexImage2D = radeonCopyTexImage2D; + functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; + } + functions->GenerateMipmap = radeonGenerateMipmap; functions->NewTextureImage = radeonNewTextureImage; diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index a4aaddc74f..0113ffd3da 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -52,6 +52,6 @@ extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t, extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t ); -extern void radeonInitTextureFuncs( struct dd_function_table *functions ); +extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); #endif /* __RADEON_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_texcopy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index ebc9c05b8a..44e144c80f 100644 --- a/src/mesa/drivers/dri/r300/r300_texcopy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -26,7 +26,7 @@ */ #include "radeon_common.h" -#include "r300_context.h" +#include "radeon_texture.h" #include "main/image.h" #include "main/teximage.h" @@ -34,11 +34,8 @@ #include "drivers/common/meta.h" #include "radeon_mipmap_tree.h" -#include "r300_blit.h" #include <main/debug.h> -// TODO: -// need to pass correct pitch for small dst textures! static GLboolean do_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, @@ -48,13 +45,13 @@ do_copy_texsubimage(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) { - struct r300_context *r300 = R300_CONTEXT(ctx); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_renderbuffer *rrb; if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { - rrb = radeon_get_depthbuffer(&r300->radeon); + rrb = radeon_get_depthbuffer(radeon); } else { - rrb = radeon_get_colorbuffer(&r300->radeon); + rrb = radeon_get_colorbuffer(radeon); } if (!timg->mt) { @@ -69,10 +66,6 @@ do_copy_texsubimage(GLcontext *ctx, intptr_t src_offset = rrb->draw_offset; intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level); - if (src_offset % 32 || dst_offset % 32) { - return GL_FALSE; - } - if (0) { fprintf(stderr, "%s: copying to face %d, level %d\n", __FUNCTION__, _mesa_tex_target_to_face(target), level); @@ -84,18 +77,19 @@ do_copy_texsubimage(GLcontext *ctx, } /* blit from src buffer to texture */ - return r300_blit(r300, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp, - rrb->base.Width, rrb->base.Height, x, y, - timg->mt->bo, dst_offset, timg->base.TexFormat, - timg->base.Width, timg->base.Width, timg->base.Height, - dstx, dsty, width, height, 1); + return radeon->vtbl.blit(ctx, rrb->bo, src_offset, rrb->base.Format, rrb->pitch/rrb->cpp, + rrb->base.Width, rrb->base.Height, x, y, + timg->mt->bo, dst_offset, timg->base.TexFormat, + timg->mt->levels[level].rowstride / _mesa_get_format_bytes(timg->base.TexFormat), + timg->base.Width, timg->base.Height, + dstx, dsty, width, height, 1); } -static void -r300CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) +void +radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) { struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = @@ -139,11 +133,11 @@ fail: width, height, border); } -static void -r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLint x, GLint y, - GLsizei width, GLsizei height) +void +radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) { struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); @@ -159,10 +153,3 @@ r300CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, xoffset, yoffset, x, y, width, height); } } - - -void r300_init_texcopy_functions(struct dd_function_table *table) -{ - table->CopyTexImage2D = r300CopyTexImage2D; - table->CopyTexSubImage2D = r300CopyTexSubImage2D; -}
\ No newline at end of file diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 03178116c1..20a27ad9a7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -197,21 +197,6 @@ void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) radeon_bo_unmap(t->mt->bo); } -GLuint radeon_face_for_target(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - default: - return 0; - } -} - /** * Wraps Mesa's implementation to ensure that the base level image is mapped. * @@ -248,7 +233,7 @@ static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj) { - GLuint face = radeon_face_for_target(target); + GLuint face = _mesa_tex_target_to_face(target); radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]); radeon_teximage_map(baseimage, GL_FALSE); @@ -710,7 +695,7 @@ static void radeon_teximage( radeon_texture_image* image = get_radeon_texture_image(texImage); GLint postConvWidth = width; GLint postConvHeight = height; - GLuint face = radeon_face_for_target(target); + GLuint face = _mesa_tex_target_to_face(target); { struct radeon_bo *bo; @@ -863,7 +848,7 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve if (RADEON_DEBUG & RADEON_TEXTURE) { fprintf(stderr, "radeon_texsubimage%dd: texObj %p, texImage %p, face %d, level %d\n", - dims, texObj, texImage, radeon_face_for_target(target), level); + dims, texObj, texImage, _mesa_tex_target_to_face(target), level); } t->validated = GL_FALSE; diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 906daf12d0..f09dd65214 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -44,7 +44,6 @@ void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj); void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj); int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj); -GLuint radeon_face_for_target(GLenum target); gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx, GLint internalFormat, @@ -126,4 +125,14 @@ void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage); +void radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border); + +void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height); + #endif diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index e81d7fdcd0..1b33de1edf 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -1959,7 +1959,30 @@ #define RADEON_SE_ZBIAS_FACTOR 0x1db0 #define RADEON_SE_ZBIAS_CONSTANT 0x1db4 - +#define RADEON_SE_VTX_FMT 0x2080 +# define RADEON_SE_VTX_FMT_XY 0x00000000 +# define RADEON_SE_VTX_FMT_W0 0x00000001 +# define RADEON_SE_VTX_FMT_FPCOLOR 0x00000002 +# define RADEON_SE_VTX_FMT_FPALPHA 0x00000004 +# define RADEON_SE_VTX_FMT_PKCOLOR 0x00000008 +# define RADEON_SE_VTX_FMT_FPSPEC 0x00000010 +# define RADEON_SE_VTX_FMT_FPFOG 0x00000020 +# define RADEON_SE_VTX_FMT_PKSPEC 0x00000040 +# define RADEON_SE_VTX_FMT_ST0 0x00000080 +# define RADEON_SE_VTX_FMT_ST1 0x00000100 +# define RADEON_SE_VTX_FMT_Q1 0x00000200 +# define RADEON_SE_VTX_FMT_ST2 0x00000400 +# define RADEON_SE_VTX_FMT_Q2 0x00000800 +# define RADEON_SE_VTX_FMT_ST3 0x00001000 +# define RADEON_SE_VTX_FMT_Q3 0x00002000 +# define RADEON_SE_VTX_FMT_Q0 0x00004000 +# define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK 0x00038000 +# define RADEON_SE_VTX_FMT_N0 0x00040000 +# define RADEON_SE_VTX_FMT_XY1 0x08000000 +# define RADEON_SE_VTX_FMT_Z1 0x10000000 +# define RADEON_SE_VTX_FMT_W1 0x20000000 +# define RADEON_SE_VTX_FMT_N1 0x40000000 +# define RADEON_SE_VTX_FMT_Z 0x80000000 /* Registers for CP and Microcode Engine */ #define RADEON_CP_ME_RAM_ADDR 0x07d4 diff --git a/src/mesa/glapi/gl_XML.py b/src/mesa/glapi/gl_XML.py index bafb00306f..a10a35e513 100644 --- a/src/mesa/glapi/gl_XML.py +++ b/src/mesa/glapi/gl_XML.py @@ -184,7 +184,7 @@ class gl_print_base: The name is also added to the file's undef_list. """ self.undef_list.append("PURE") - print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) + print """# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PURE __attribute__((pure)) # else # define PURE @@ -224,7 +224,7 @@ class gl_print_base: """ self.undef_list.append(S) - print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) + print """# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define %s __attribute__((visibility("%s"))) # else # define %s @@ -244,7 +244,7 @@ class gl_print_base: """ self.undef_list.append("NOINLINE") - print """# if defined(__GNUC__) + print """# if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define NOINLINE __attribute__((noinline)) # else # define NOINLINE diff --git a/src/mesa/glapi/glapitemp.h b/src/mesa/glapi/glapitemp.h index 6767a07673..b8bfcc1a16 100644 --- a/src/mesa/glapi/glapitemp.h +++ b/src/mesa/glapi/glapitemp.h @@ -27,7 +27,7 @@ */ -# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) && defined(__ELF__) +# if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined(__ELF__) # define HIDDEN __attribute__((visibility("hidden"))) # else # define HIDDEN diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 4eb249b4af..9eab1ead24 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -173,7 +173,8 @@ extern "C" { * We also need to define a USED attribute, so the optimizer doesn't * inline a static function that we later use in an alias. - ajax */ -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +#if (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \ + || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) # define PUBLIC __attribute__((visibility("default"))) # define USED __attribute__((used)) #else diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 7b3599f932..4da245ab49 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -861,6 +861,9 @@ _mesa_GenRenderbuffersEXT(GLsizei n, GLuint *renderbuffers) * * \return one of GL_RGB, GL_RGBA, GL_STENCIL_INDEX, GL_DEPTH_COMPONENT * GL_DEPTH_STENCIL_EXT or zero if error. + * + * XXX in the future when we support red-only and red-green formats + * we'll also return GL_RED and GL_RG. */ GLenum _mesa_base_fbo_format(GLcontext *ctx, GLenum internalFormat) diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 77153889b6..d70cf877e8 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -57,7 +57,7 @@ void st_upload_constants( struct st_context *st, unsigned shader_type) { struct pipe_context *pipe = st->pipe; - struct pipe_constant_buffer *cbuf = &st->state.constants[shader_type]; + struct pipe_buffer **cbuf = &st->state.constants[shader_type]; assert(shader_type == PIPE_SHADER_VERTEX || shader_type == PIPE_SHADER_FRAGMENT); @@ -71,8 +71,8 @@ void st_upload_constants( struct st_context *st, /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_buffer_reference(&cbuf->buffer, NULL ); - cbuf->buffer = pipe_buffer_create(pipe->screen, 16, + pipe_buffer_reference(cbuf, NULL ); + *cbuf = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); @@ -84,12 +84,12 @@ void st_upload_constants( struct st_context *st, } /* load Mesa constants into the constant buffer */ - if (cbuf->buffer) - st_no_flush_pipe_buffer_write(st, cbuf->buffer, + if (cbuf) + st_no_flush_pipe_buffer_write(st, *cbuf, 0, paramBytes, params->ParameterValues); - st->pipe->set_constant_buffer(st->pipe, shader_type, 0, cbuf); + st->pipe->set_constant_buffer(st->pipe, shader_type, 0, *cbuf); } else { st->constants.tracked_state[shader_type].dirty.mesa = 0x0; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 46c8cbb309..176f3ea68d 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -35,8 +35,6 @@ * Brian Paul */ - - #include "main/imports.h" #include "main/mtypes.h" #include "main/macros.h" @@ -57,9 +55,7 @@ - - -/* +/** * Translate fragment program if needed. */ static void @@ -155,8 +151,10 @@ find_translated_vp(struct st_context *st, } - - +/** + * Return pointer to a pass-through fragment shader. + * This shader is used when a texture is missing/incomplete. + */ static void * get_passthrough_fs(struct st_context *st) { @@ -168,6 +166,11 @@ get_passthrough_fs(struct st_context *st) return st->passthrough_fs; } + +/** + * Update fragment program state/atom. This involves translating the + * Mesa fragment program into a gallium fragment program and binding it. + */ static void update_fp( struct st_context *st ) { @@ -191,6 +194,7 @@ update_fp( struct st_context *st ) } } + const struct st_tracked_state st_update_fp = { "st_update_fp", /* name */ { /* dirty */ @@ -202,7 +206,10 @@ const struct st_tracked_state st_update_fp = { - +/** + * Update vertex program state/atom. This involves translating the + * Mesa vertex program into a gallium fragment program and binding it. + */ static void update_vp( struct st_context *st ) { diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 145bd62b83..9e6ce30db0 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -218,8 +218,8 @@ static void st_destroy_context_priv( struct st_context *st ) } for (i = 0; i < Elements(st->state.constants); i++) { - if (st->state.constants[i].buffer) { - pipe_buffer_reference(&st->state.constants[i].buffer, NULL); + if (st->state.constants[i]) { + pipe_buffer_reference(&st->state.constants[i], NULL); } } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 831909a3f8..2c4943cfb0 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -92,7 +92,7 @@ struct st_context struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_buffer *constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; struct pipe_scissor_state scissor; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index e54f21be60..b0d5b993a7 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -365,6 +365,7 @@ setup_interleaved_attribs(GLcontext *ctx, velements[attr].src_offset = (unsigned) (arrays[mesaAttr]->Ptr - offset0); + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = 0; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format = @@ -454,6 +455,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, /* common-case setup */ vbuffer[attr].stride = stride; /* in bytes */ vbuffer[attr].max_index = max_index; + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format @@ -522,7 +524,6 @@ st_draw_vbo(GLcontext *ctx, struct pipe_context *pipe = ctx->st->pipe; const struct st_vertex_program *vp; const struct st_vp_varient *vpv; - const struct pipe_shader_state *vs; struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; GLuint attr; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; @@ -550,7 +551,6 @@ st_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; vpv = ctx->st->vp_varient; - vs = &vpv->state; #if 0 if (MESA_VERBOSE & VERBOSE_GLSL) { diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index cfc0caac98..a05d6dd06b 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -177,6 +177,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ vbuffers[attr].max_index = max_index; + velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; velements[attr].src_format = @@ -239,11 +240,11 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ mapped_constants = pipe_buffer_map(pipe->screen, - st->state.constants[PIPE_SHADER_VERTEX].buffer, + st->state.constants[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(st->draw, PIPE_SHADER_VERTEX, mapped_constants, - st->state.constants[PIPE_SHADER_VERTEX].buffer->size); + st->state.constants[PIPE_SHADER_VERTEX]->size); /* draw here */ @@ -253,7 +254,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* unmap constant buffers */ - pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer); + pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX]); /* * unmap vertex/index buffers diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index e25a613d8a..2a5fb27d8f 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -168,6 +168,7 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.EXT_blend_subtract = GL_TRUE; ctx->Extensions.EXT_framebuffer_blit = GL_TRUE; ctx->Extensions.EXT_framebuffer_object = GL_TRUE; + ctx->Extensions.EXT_framebuffer_multisample = GL_TRUE; ctx->Extensions.EXT_fog_coord = GL_TRUE; ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE; ctx->Extensions.EXT_pixel_buffer_object = GL_TRUE; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index e788008dfe..05b56c9b58 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -48,6 +48,10 @@ struct label { unsigned token; }; + +/** + * Intermediate state used during shader translation. + */ struct st_translate { struct ureg_program *ureg; @@ -730,6 +734,7 @@ emit_face_var( struct st_translate *t, t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); } + static void emit_edgeflags( struct st_translate *t, const struct gl_program *program ) @@ -741,6 +746,7 @@ emit_edgeflags( struct st_translate *t, ureg_MOV( ureg, edge_dst, edge_src ); } + /** * Translate Mesa program to TGSI format. * \param program the program to translate @@ -758,7 +764,7 @@ emit_edgeflags( struct st_translate *t, * \param outputSemanticIndex the semantic index (ex: which texcoord) for * each output * - * \return array of translated tokens, caller's responsibility to free + * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY */ enum pipe_error st_translate_mesa_program( @@ -779,6 +785,7 @@ st_translate_mesa_program( { struct st_translate translate, *t; unsigned i; + enum pipe_error ret = PIPE_OK; t = &translate; memset(t, 0, sizeof *t); @@ -865,8 +872,10 @@ st_translate_mesa_program( t->constants = CALLOC( program->Parameters->NumParameters, sizeof t->constants[0] ); - if (t->constants == NULL) + if (t->constants == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; + } for (i = 0; i < program->Parameters->NumParameters; i++) { switch (program->Parameters->Parameters[i].Type) { @@ -920,8 +929,6 @@ st_translate_mesa_program( t->insn[t->labels[i].branch_target] ); } - return PIPE_OK; - out: FREE(t->insn); FREE(t->labels); @@ -931,7 +938,7 @@ out: debug_printf("%s: translate error flag set\n", __FUNCTION__); } - return PIPE_ERROR_OUT_OF_MEMORY; + return ret; } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6a869fae90..5c87e47ca3 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -44,7 +44,6 @@ #include "st_debug.h" #include "st_context.h" -#include "st_atom.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "cso_cache/cso_context.h" diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 8a3e4cd3ac..b210ac9187 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -35,7 +35,6 @@ #include "main/texfetch.h" #include "main/teximage.h" #include "main/texobj.h" -#include "main/texstore.h" #undef Elements /* fix re-defined macro warning */ diff --git a/src/mesa/swrast/s_accum.c b/src/mesa/swrast/s_accum.c index 0e0876efcb..cf53f01b7c 100644 --- a/src/mesa/swrast/s_accum.c +++ b/src/mesa/swrast/s_accum.c @@ -27,7 +27,6 @@ #include "main/context.h" #include "main/macros.h" #include "main/imports.h" -#include "main/fbobject.h" #include "s_accum.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index e88ff19123..353e9999d6 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -23,7 +23,6 @@ #include "main/colormac.h" #include "main/context.h" #include "main/macros.h" -#include "shader/program.h" #include "shader/atifragshader.h" #include "swrast/s_atifragshader.h" diff --git a/src/mesa/swrast/s_bitmap.c b/src/mesa/swrast/s_bitmap.c index 46c63aa645..59e26e9ea3 100644 --- a/src/mesa/swrast/s_bitmap.c +++ b/src/mesa/swrast/s_bitmap.c @@ -33,7 +33,6 @@ #include "main/condrender.h" #include "main/image.h" #include "main/macros.h" -#include "main/pixel.h" #include "s_context.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c index 986b6aff4f..e881d1be30 100644 --- a/src/mesa/swrast/s_copypix.c +++ b/src/mesa/swrast/s_copypix.c @@ -28,11 +28,9 @@ #include "main/colormac.h" #include "main/condrender.h" #include "main/convolve.h" -#include "main/histogram.h" #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "s_context.h" #include "s_depth.h" diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index c37a54eb3e..0b6bb7e3ec 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -28,7 +28,6 @@ #include "main/formats.h" #include "main/macros.h" #include "main/imports.h" -#include "main/fbobject.h" #include "s_depth.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index 55a4c4c3c6..248d6cc1c0 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -31,7 +31,6 @@ #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "main/state.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c index 47ed25ee10..2e6066983d 100644 --- a/src/mesa/swrast/s_feedback.c +++ b/src/mesa/swrast/s_feedback.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" #include "main/context.h" -#include "main/enums.h" #include "main/feedback.h" #include "main/macros.h" diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index a22d34415d..9ac33a26a6 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/colormac.h" #include "main/context.h" -#include "main/texstate.h" #include "shader/prog_instruction.h" #include "s_fragprog.h" diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c index 23cb9b57ef..5411229d70 100644 --- a/src/mesa/swrast/s_lines.c +++ b/src/mesa/swrast/s_lines.c @@ -29,7 +29,6 @@ #include "main/macros.h" #include "s_aaline.h" #include "s_context.h" -#include "s_depth.h" #include "s_feedback.h" #include "s_lines.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c index 50ec2063a5..6b955429e9 100644 --- a/src/mesa/swrast/s_points.c +++ b/src/mesa/swrast/s_points.c @@ -27,7 +27,6 @@ #include "main/colormac.h" #include "main/context.h" #include "main/macros.h" -#include "main/texstate.h" #include "s_context.h" #include "s_feedback.h" #include "s_points.h" diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 44a11cd6dd..94fb974eab 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -33,7 +33,6 @@ #include "main/image.h" #include "main/macros.h" #include "main/imports.h" -#include "main/pixel.h" #include "main/state.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 889164b986..594b71a03c 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -29,7 +29,6 @@ #include "main/colormac.h" #include "main/image.h" #include "main/imports.h" -#include "main/pixel.h" #include "shader/prog_instruction.h" #include "s_context.h" diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index db21b4589d..5a14e595a0 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -38,7 +38,6 @@ #include "tnl.h" #include "t_context.h" #include "t_pipeline.h" -#include "t_vp_build.h" #include "vbo/vbo.h" diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index d31b29b9b4..38757a0e28 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -29,15 +29,11 @@ #include "main/condrender.h" #include "main/context.h" #include "main/imports.h" -#include "main/state.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/enums.h" #include "t_context.h" -#include "t_pipeline.h" -#include "t_vp_build.h" -#include "t_vertex.h" #include "tnl.h" diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 01b30babb4..946b29e250 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -28,7 +28,6 @@ #include "main/glheader.h" #include "main/context.h" #include "main/imports.h" -#include "main/state.h" #include "main/mtypes.h" #include "t_context.h" diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 99b6787455..13b84a7d77 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -29,7 +29,6 @@ #include "main/feedback.h" #include "main/light.h" #include "main/macros.h" -#include "main/rastpos.h" #include "main/simple_list.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 15a8a67b91..5396548666 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -40,7 +40,6 @@ #include "shader/prog_statevars.h" #include "shader/prog_execute.h" #include "swrast/s_context.h" -#include "swrast/s_texfilter.h" #include "tnl/tnl.h" #include "tnl/t_context.h" diff --git a/src/mesa/vbo/vbo_exec.c b/src/mesa/vbo/vbo_exec.c index e168a89ea5..a057befed0 100644 --- a/src/mesa/vbo/vbo_exec.c +++ b/src/mesa/vbo/vbo_exec.c @@ -28,9 +28,6 @@ #include "main/api_arrayelt.h" #include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/macros.h" #include "main/mtypes.h" #include "main/vtxfmt.h" diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 6de8f059b7..2c82f7c9c5 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -35,7 +35,6 @@ #include "main/bufferobj.h" #include "main/enums.h" #include "main/macros.h" -#include "glapi/dispatch.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 4f43856016..d7dbbceb1b 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -30,7 +30,6 @@ #include "main/context.h" #include "main/enums.h" #include "main/state.h" -#include "main/macros.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c index 9757c3d9f6..10f705cf84 100644 --- a/src/mesa/vbo/vbo_save.c +++ b/src/mesa/vbo/vbo_save.c @@ -28,8 +28,6 @@ #include "main/mtypes.h" #include "main/bufferobj.h" -#include "main/dlist.h" -#include "main/vtxfmt.h" #include "main/imports.h" #include "vbo_context.h" diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c index b7a74e4535..f13a16e3b5 100644 --- a/src/mesa/vbo/vbo_save_loopback.c +++ b/src/mesa/vbo/vbo_save_loopback.c @@ -29,7 +29,6 @@ #include "main/glheader.h" #include "main/enums.h" #include "main/imports.h" -#include "main/macros.h" #include "main/mtypes.h" #include "glapi/dispatch.h" #include "glapi/glapi.h" diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index c45190b9dd..2ca111217c 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -34,7 +34,6 @@ #include "main/imports.h" #include "main/image.h" #include "main/macros.h" -#include "main/enums.h" #include "main/mtypes.h" #include "vbo_split.h" @@ -221,8 +220,6 @@ begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) { struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; -/* _mesa_printf("begin %s (%d)\n", _mesa_lookup_prim_by_nr(mode), begin_flag); */ - prim->mode = mode; prim->begin = begin_flag; } diff --git a/src/mesa/x86/x86_xform.c b/src/mesa/x86/x86_xform.c index 52f6b25d81..c834e2b468 100644 --- a/src/mesa/x86/x86_xform.c +++ b/src/mesa/x86/x86_xform.c @@ -30,7 +30,6 @@ #include "main/glheader.h" #include "main/context.h" #include "math/m_xform.h" -#include "tnl/t_context.h" #include "x86_xform.h" #include "common_x86_asm.h" |