From 77e0523fb7769df4bf43747e136b1653b2421b97 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 4 Oct 2007 12:07:25 -0700 Subject: [965] Replace various alignment code with a shared ALIGN() macro. In the process, fix some alignment issues: - Scratch space allocation was aligned into units of 1KB, while the allocation wanted units of bytes, so we never allocated enough space for scratch. - GRF register count was programmed as ALIGN(val - 1, 16) / 16 instead of ALIGN(val, 16) / 16 - 1, which overcounted for val != 16n+1. --- src/mesa/drivers/dri/i915/intel_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_clip_state.c | 3 ++- src/mesa/drivers/dri/i965/brw_curbe.c | 2 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 2 +- src/mesa/drivers/dri/i965/brw_gs_state.c | 3 ++- src/mesa/drivers/dri/i965/brw_sf_state.c | 2 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 2 +- src/mesa/drivers/dri/i965/brw_state_pool.c | 5 ++--- src/mesa/drivers/dri/i965/brw_tex_layout.c | 4 +--- src/mesa/drivers/dri/i965/brw_vs_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 4 ++-- src/mesa/drivers/dri/i965/bufmgr_fake.c | 2 +- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++-- src/mesa/drivers/dri/i965/intel_blit.c | 2 +- src/mesa/drivers/dri/i965/intel_context.h | 2 ++ src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++-- src/mesa/drivers/dri/i965/intel_pixel_bitmap.c | 9 ++------- src/mesa/drivers/dri/intel/intel_tex_layout.c | 20 +++++++------------- 18 files changed, 33 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index c8298dd9c4..ce9a362944 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -292,6 +292,8 @@ extern char *__progname; #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) + #define INTEL_FIREVERTICES(intel) \ do { \ if ((intel)->prim.flush) \ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index ae46d7a86e..ba2f0edf51 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,7 +43,8 @@ static void upload_clip_unit( struct brw_context *brw ) memset(&clip, 0, sizeof(clip)); /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16; + clip.thread0.grf_reg_count = + ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1; clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index d3c88c1dca..fa4ea42aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -304,7 +304,7 @@ static void upload_constant_buffer(struct brw_context *brw) if (!brw_pool_alloc(pool, bufsz, - 6, + 1 << 6, &brw->curbe.gs_offset)) { _mesa_printf("out of GS memory for curbe\n"); assert(0); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 89cd063d46..b7795703fd 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -290,7 +290,7 @@ static void get_space( struct brw_context *brw, struct gl_buffer_object **vbo_return, GLuint *offset_return ) { - size = (size + 63) & ~63; + size = ALIGN(size, 64); if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) wrap_buffers(brw, size); diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 5826c01d4f..5db4dd4603 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -46,7 +46,8 @@ static void upload_gs_unit( struct brw_context *brw ) /* CACHE_NEW_GS_PROG */ if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16; + gs.thread0.grf_reg_count = + ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 236c6fd42a..2257916aae 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -118,7 +118,7 @@ static void upload_sf_unit( struct brw_context *brw ) memset(&sf, 0, sizeof(sf)); /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16; + sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 98d765ac0e..0e73ff8390 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -148,7 +148,7 @@ GLuint brw_upload_cache( struct brw_cache *cache, GLuint hash = hash_key(key, key_size); void *tmp = _mesa_malloc(key_size + cache->aux_size); - if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) { + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { /* Should not be possible: */ _mesa_printf("brw_pool_alloc failed\n"); diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c index 708ae857ab..eda92a2fa8 100644 --- a/src/mesa/drivers/dri/i965/brw_state_pool.c +++ b/src/mesa/drivers/dri/i965/brw_state_pool.c @@ -41,10 +41,9 @@ GLboolean brw_pool_alloc( struct brw_mem_pool *pool, GLuint align, GLuint *offset_return) { - GLuint align_mask = (1<offset + align_mask) & ~align_mask) - pool->offset; + GLuint fixup = ALIGN(pool->offset, align) - pool->offset; - size = (size + 3) & ~3; + size = ALIGN(size, 4); if (pool->offset + fixup + size >= pool->size) { _mesa_printf("%s failed\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 2094a1c8ad..e306c9cf10 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -37,8 +37,6 @@ #include "intel_tex_layout.h" #include "macros.h" -#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) - GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) { /* XXX: these vary depending on image format: @@ -64,7 +62,7 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; + mt->pitch = ALIGN(mt->width0 * mt->cpp, 4) / mt->cpp; pack_y_pitch = ALIGN(mt->height0, align_h); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c225bf8f5c..f561979138 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw ) /* CACHE_NEW_VS_PROG */ vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; + vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs.thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 5b4f2abd0e..351de6d90e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -62,7 +62,7 @@ static void upload_wm_unit(struct brw_context *brw ) memset(&wm, 0, sizeof(wm)); /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; + wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; @@ -71,7 +71,7 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.max_threads = max_threads; if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; + GLuint per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024); GLuint total = per_thread * (max_threads + 1); /* Scratch space -- just have to make sure there is sufficient diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c index a85121122f..65760c40d4 100644 --- a/src/mesa/drivers/dri/i965/bufmgr_fake.c +++ b/src/mesa/drivers/dri/i965/bufmgr_fake.c @@ -168,7 +168,7 @@ static GLboolean alloc_from_pool( struct intel_context *intel, if (!block) return GL_FALSE; - sz = (buf->size + align-1) & ~(align-1); + sz = ALIGN(buf->size, align); block->mem = mmAllocMem(pool->heap, sz, diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index fb58c0e708..7a6293b557 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -37,7 +37,7 @@ static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) assert(batch->map == NULL); batch->offset = (unsigned long)batch->ptr; - batch->offset = (batch->offset + 63) & ~63; + batch->offset = ALIGN(batch->offset, 64); batch->ptr = (unsigned char *) batch->offset; if (BATCH_SZ - batch->offset < BATCH_REFILL) { @@ -208,7 +208,7 @@ void intel_batchbuffer_align( struct intel_batchbuffer *batch, GLuint sz ) { unsigned long ptr = (unsigned long) batch->ptr; - unsigned long aptr = (ptr + align) & ~((unsigned long)align-1); + unsigned long aptr = ALIGN(ptr, align); GLuint fixup = aptr - ptr; if (intel_batchbuffer_space(batch) < fixup + sz) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 7a9e1a2a3f..d1c1c8afb6 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -533,7 +533,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLenum logic_op) { struct xy_text_immediate_blit text; - int dwords = ((src_size + 7) & ~7) / 4; + int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13; assert( logic_op - GL_CLEAR >= 0 ); diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h index f63c2f613d..65898caaa7 100644 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -252,6 +252,8 @@ void UNLOCK_HARDWARE( struct intel_context *intel ); #define SUBPIXEL_X 0.125 #define SUBPIXEL_Y 0.125 +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) + /* ================================================================ * Color packing: */ diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 0fb33e27f4..268a982a97 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -233,8 +233,8 @@ GLboolean intel_miptree_image_data(struct intel_context *intel, if (dst->compressed) { alignment = intel_compressed_alignment(dst->internal_format); - src_row_pitch = ((src_row_pitch + alignment - 1) & ~(alignment - 1)); - width = ((width + alignment - 1) & ~(alignment - 1)); + src_row_pitch = ALIGN(src_row_pitch, alignment); + width = ALIGN(width, alignment); height = (height + 3) / 4; } diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 79c1fee9c0..3777422619 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -91,11 +91,6 @@ static void set_bit( GLubyte *dest, dest[bit/8] |= 1 << (bit % 8); } -static int align(int x, int align) -{ - return (x + align - 1) & ~(align - 1); -} - /* Extract a rectangle's worth of data from the bitmap. Called * per-cliprect. */ @@ -147,7 +142,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, } if (row_align) - bit = (bit + row_align - 1) & ~(row_align - 1); + bit = ALIGN(bit, row_align); } return count; @@ -268,7 +263,7 @@ do_blit_bitmap( GLcontext *ctx, for (px = 0; px < box_w; px += DX) { int h = MIN2(DY, box_h - py); int w = MIN2(DX, box_w - px); - GLuint sz = align(align(w,8) * h, 64)/8; + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY; diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index fdecd3e186..e3c6e1c17c 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -34,12 +34,6 @@ #include "intel_tex_layout.h" #include "macros.h" - -static int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - GLuint intel_compressed_alignment(GLenum internalFormat) { GLuint alignment = 4; @@ -70,7 +64,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt ) if (mt->compressed) { align_w = intel_compressed_alignment(mt->internal_format); - mt->pitch = align(mt->width0, align_w); + mt->pitch = ALIGN(mt->width0, align_w); } /* May need to adjust pitch to accomodate the placement of @@ -82,10 +76,10 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt ) GLuint mip1_width; if (mt->compressed) { - mip1_width = align(minify(mt->width0), align_w) - + align(minify(minify(mt->width0)), align_w); + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); } else { - mip1_width = align(minify(mt->width0), align_w) + mip1_width = ALIGN(minify(mt->width0), align_w) + minify(minify(mt->width0)); } @@ -97,7 +91,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt ) /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - mt->pitch = align(mt->pitch * mt->cpp, 4) / mt->cpp; + mt->pitch = ALIGN(mt->pitch * mt->cpp, 4) / mt->cpp; mt->total_height = 0; for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { @@ -109,7 +103,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt ) if (mt->compressed) img_height = MAX2(1, height/4); else - img_height = align(height, align_h); + img_height = ALIGN(height, align_h); /* Because the images are packed better, the final offset @@ -120,7 +114,7 @@ void i945_miptree_layout_2d( struct intel_mipmap_tree *mt ) /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += align(width, align_w); + x += ALIGN(width, align_w); } else { y += img_height; -- cgit v1.2.3