diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_draw_upload.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 515 |
1 files changed, 251 insertions, 264 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 6150cac4aa..303eaac5cf 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -27,52 +27,21 @@ #include <stdlib.h> -#include "glheader.h" -#include "context.h" -#include "state.h" -#include "api_validate.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" -#include "brw_aub.h" #include "brw_state.h" #include "brw_fallback.h" -#include "intel_ioctl.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" - - -struct brw_array_state { - union header_union header; - - struct { - union { - struct { - GLuint pitch:11; - GLuint pad:15; - GLuint access_type:1; - GLuint vb_index:5; - } bits; - GLuint dword; - } vb0; - - struct buffer *buffer; - GLuint offset; - - GLuint max_index; - GLuint instance_data_step_rate; - - } vb[BRW_VBP_MAX]; -}; - - -static struct buffer *array_buffer( const struct gl_client_array *array ) -{ - return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); -} +#include "intel_tex.h" static GLuint double_types[5] = { 0, @@ -247,194 +216,165 @@ static GLuint get_index_type(GLenum type) } } -static void copy_strided_array( GLubyte *dest, - const GLubyte *src, - GLuint size, - GLuint stride, - GLuint count ) -{ - if (size == stride) - do_memcpy(dest, src, count * size); - else { - GLuint i,j; - - for (i = 0; i < count; i++) { - for (j = 0; j < size; j++) - *dest++ = *src++; - src += (stride - size); - } - } -} - static void wrap_buffers( struct brw_context *brw, GLuint size ) { - GLcontext *ctx = &brw->intel.ctx; - if (size < BRW_UPLOAD_INIT_SIZE) size = BRW_UPLOAD_INIT_SIZE; - brw->vb.upload.buf++; - brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS; brw->vb.upload.offset = 0; - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - size, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[brw->vb.upload.buf]); + if (brw->vb.upload.bo != NULL) + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", + size, 1); + + /* Set the internal VBO\ to no-backing-store. We only use them as a + * temporary within a brw_try_draw_prims while the lock is held. + */ + /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH + FAKE TO PUSH THIS STUFF */ +// if (!brw->intel.ttm) +// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, GLuint size, - struct gl_buffer_object **vbo_return, + dri_bo **bo_return, GLuint *offset_return ) { - size = (size + 63) & ~63; - - if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) + size = ALIGN(size, 64); + + if (brw->vb.upload.bo == NULL || + brw->vb.upload.offset + size > brw->vb.upload.bo->size) { wrap_buffers(brw, size); + } - *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf]; + dri_bo_reference(brw->vb.upload.bo); + *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; brw->vb.upload.offset += size; } - - -static struct gl_client_array * +static void copy_array_to_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *array, - GLuint element_size, - GLuint count) + struct brw_vertex_element *element, + GLuint dst_stride) { - GLcontext *ctx = &brw->intel.ctx; - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - GLuint size = count * element_size; - struct gl_buffer_object *vbo; - GLuint offset; - GLuint new_stride; + GLuint size = element->count * dst_stride; - get_space(brw, size, &vbo, &offset); + get_space(brw, size, &element->bo, &element->offset); - if (array->StrideB == 0) { - assert(count == 1); - new_stride = 0; + if (element->glarray->StrideB == 0) { + assert(element->count == 1); + element->stride = 0; + } else { + element->stride = dst_stride; } - else - new_stride = element_size; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = new_stride; - vbo_array->StrideB = new_stride; - vbo_array->Ptr = (const void *)offset; - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; /* ? */ - vbo_array->BufferObj = vbo; - - { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - vbo); - - map += offset; - copy_strided_array( map, - array->Ptr, - element_size, - array->StrideB, - count); + if (dst_stride == element->glarray->StrideB) { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } else { + void *data; + char *dest; + const char *src = element->glarray->Ptr; + int i; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj); + dri_bo_subdata(element->bo, + element->offset, + size, + data); + _mesa_free(data); } - - return vbo_array; } - - -static struct gl_client_array * -interleaved_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *uploaded_array, - const struct gl_client_array *array, - const char *ptr) -{ - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = array->Stride; - vbo_array->StrideB = array->StrideB; - vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + - ((const char *)array->Ptr - ptr)); - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; - vbo_array->BufferObj = uploaded_array->BufferObj; - - return vbo_array; -} - - -GLboolean brw_upload_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ) +static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element_packet vep; - struct brw_array_state vbp; GLuint i; - const void *ptr = NULL; + const unsigned char *ptr = NULL; GLuint interleave = 0; + unsigned int min_index = brw->vb.min_index; + unsigned int max_index = brw->vb.max_index; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; - - - memset(&vbp, 0, sizeof(vbp)); - memset(&vep, 0, sizeof(vep)); /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - + + /* Accumulate the list of enabled arrays. */ while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; + } + + /* XXX: In the rare cases where this happens we fallback all + * the way to software rasterization, although a tnl fallback + * would be sufficient. I don't know of *any* real world + * cases with > 17 vertex attributes enabled, so it probably + * isn't an issue at this point. + */ + if (nr_enabled >= BRW_VEP_MAX) { + intel->Fallback = 1; + return; + } + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; - input->index = i; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (!input->glarray->BufferObj->Name) { + if (input->glarray->BufferObj->Name != 0) { + struct intel_buffer_object *intel_buffer = + intel_buffer_object(input->glarray->BufferObj); + + /* Named buffer object: Just reference its contents directly. */ + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + input->offset = (unsigned long)input->glarray->Ptr; + input->stride = input->glarray->StrideB; + } else { + /* Queue the buffer object up to be uploaded in the next pass, + * when we've decided if we're doing interleaved or not. + */ if (i == 0) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) - return GL_FALSE; + return; interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || - (const char *)input->glarray->Ptr - (const char *)ptr < 0 || - (const char *)input->glarray->Ptr - (const char *)ptr > interleave) { + (const unsigned char *)input->glarray->Ptr - ptr < 0 || + (const unsigned char *)input->glarray->Ptr - ptr > interleave) + { interleave = 0; } @@ -451,131 +391,131 @@ GLboolean brw_upload_vertices( struct brw_context *brw, } } - /* Upload interleaved arrays if all uploads are interleaved - */ - if (nr_uploads > 1 && - interleave && - interleave <= 256) { - struct brw_vertex_element *input0 = upload[0]; - - input0->glarray = copy_array_to_vbo_array(brw, 0, - input0->glarray, - interleave, - input0->count); + /* Handle any arrays to be uploaded. */ + if (nr_uploads > 1 && interleave && interleave <= 256) { + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], interleave); for (i = 1; i < nr_uploads; i++) { - upload[i]->glarray = interleaved_vbo_array(brw, - i, - input0->glarray, - upload[i]->glarray, - ptr); + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->stride = interleave; + upload[i]->offset = upload[0]->offset + + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->bo = upload[0]->bo; + dri_bo_reference(upload[i]->bo); } } else { + /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { - struct brw_vertex_element *input = upload[i]; + copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + } + } +} - input->glarray = copy_array_to_vbo_array(brw, i, - input->glarray, - input->element_size, - input->count); +static void brw_emit_vertices(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLuint tmp = brw->vs.prog_data->inputs_read; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint i; + GLuint nr_enabled = 0; - } + /* Accumulate the list of enabled arrays. */ + while (tmp) { + i = _mesa_ffsll(tmp)-1; + struct brw_vertex_element *input = &brw->vb.inputs[i]; + + tmp &= ~(1<<i); + enabled[nr_enabled++] = input; } - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (nr_enabled >= BRW_VEP_MAX) - return GL_FALSE; - /* This still defines a hardware VB for each input, even if they + /* Now emit VB and VEP state packets. + * + * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ + BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + nr_enabled * 4) - 2)); + for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; - input->vep = &vep.ve[i]; - input->vep->ve0.src_format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Normalized); - input->vep->ve0.valid = 1; - input->vep->ve1.dst_offset = (i) * 4; - input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (input->stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset); + OUT_BATCH(brw->vb.max_index); + OUT_BATCH(0); /* Instance data step rate */ + } + ADVANCE_BATCH(); + + BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + uint32_t format = get_surface_type(input->glarray->Type, + input->glarray->Size, + input->glarray->Normalized); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (input->glarray->Size) { - case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; - case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; break; } - input->vep->ve0.vertex_buffer_index = i; - input->vep->ve0.src_offset = 0; - - vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB; - vbp.vb[i].vb0.bits.pad = 0; - vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; - vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = (GLuint)input->glarray->Ptr; - vbp.vb[i].buffer = array_buffer(input->glarray); - vbp.vb[i].max_index = max_index; - } - - - - /* Now emit VB and VEP state packets: - */ - vbp.header.bits.length = (1 + nr_enabled * 4) - 2; - vbp.header.bits.opcode = CMD_VERTEX_BUFFER; - - BEGIN_BATCH(vbp.header.bits.length+2, 0); - OUT_BATCH( vbp.header.dword ); - - for (i = 0; i < nr_enabled; i++) { - OUT_BATCH( vbp.vb[i].vb0.dword ); - OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset); - OUT_BATCH( vbp.vb[i].max_index ); - OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); - - vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; - vep.header.opcode = CMD_VERTEX_ELEMENT; - brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); - - return GL_TRUE; } +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; -static GLuint element_size( GLenum type ) -{ - switch(type) { - case GL_UNSIGNED_INT: return 4; - case GL_UNSIGNED_SHORT: return 2; - case GL_UNSIGNED_BYTE: return 1; - default: assert(0); return 0; - } -} - - - -void brw_upload_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer ) +static void brw_prepare_indices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; - struct gl_buffer_object *bufferobj = index_buffer->obj; - GLuint offset = (GLuint)index_buffer->ptr; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + dri_bo *bo; + struct gl_buffer_object *bufferobj; + GLuint offset; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ @@ -583,23 +523,56 @@ void brw_upload_indices( struct brw_context *brw, /* Get new bufferobj, offset: */ - get_space(brw, ib_size, &bufferobj, &offset); + get_space(brw, ib_size, &bo, &offset); /* Straight upload */ - ctx->Driver.BufferSubData( ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - offset, - ib_size, - index_buffer->ptr, - bufferobj); + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { + offset = (GLuint)index_buffer->ptr; + + /* If the index buffer isn't aligned to its element size, we have to + * rebase it into a temporary. + */ + if ((get_size(index_buffer->type) - 1) & offset) { + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + map += offset; + + get_space(brw, ib_size, &bo, &offset); + + dri_bo_subdata(bo, offset, ib_size, map); + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + } else { + bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), + INTEL_READ); + dri_bo_reference(bo); + } } + dri_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; +} + +static void brw_emit_indices(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; - struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); memset(&ib, 0, sizeof(ib)); @@ -609,11 +582,25 @@ void brw_upload_indices( struct brw_context *brw, ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, 0); + BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size ); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } + +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, + .emit = brw_emit_indices, +}; |