diff options
Diffstat (limited to 'src/gallium')
207 files changed, 7875 insertions, 5400 deletions
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 5cafe8c3f0..8f7d3b7100 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; @@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_SHL: break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: break; case TGSI_OPCODE_AND: break; diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a9375abd21..ba6f7b15f9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -80,7 +80,7 @@ struct fenced_buffer_list */ struct fenced_buffer { - /* + /* * Immutable members. */ @@ -126,8 +126,8 @@ fenced_buffer(struct pb_buffer *buf) /** * Add the buffer to the fenced list. * - * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this - * order before calling this function. + * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this + * order, before calling this function. * * Reference count should be incremented before calling this function. */ @@ -191,7 +191,7 @@ fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list, * Wait for the fence to expire, and remove it from the fenced list. * * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be - * held -- it will + * held -- it will be acquired internally. */ static INLINE enum pipe_error fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, @@ -207,7 +207,10 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, assert(pipe_is_referenced(&fenced_buf->base.base.reference)); assert(fenced_buf->fence); - /* Acquire the global lock */ + /* + * Acquire the global lock. Must release buffer mutex first to preserve + * lock order. + */ pipe_mutex_unlock(fenced_buf->mutex); pipe_mutex_lock(fenced_list->mutex); pipe_mutex_lock(fenced_buf->mutex); @@ -217,7 +220,7 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, /* Remove from the fenced list */ /* TODO: remove consequents */ fenced_buffer_remove_locked(fenced_list, fenced_buf); - + p_atomic_dec(&fenced_buf->base.base.reference.count); assert(pipe_is_referenced(&fenced_buf->base.base.reference)); @@ -238,7 +241,7 @@ fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list, */ static void fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, - int wait) + int wait) { struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; @@ -274,7 +277,6 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, pb_buf = &fenced_buf->base; pb_reference(&pb_buf, NULL); - curr = next; next = curr->next; @@ -329,7 +331,7 @@ fenced_buffer_map(struct pb_buffer *buf, if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) && ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { /* Don't wait for the GPU to finish writing */ - goto finish; + goto done; } /* Wait for the GPU to finish writing */ @@ -350,7 +352,7 @@ fenced_buffer_map(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; } -finish: +done: pipe_mutex_unlock(fenced_buf->mutex); return map; @@ -391,7 +393,7 @@ fenced_buffer_validate(struct pb_buffer *buf, fenced_buf->vl = NULL; fenced_buf->validation_flags = 0; ret = PIPE_OK; - goto finish; + goto done; } assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); @@ -401,7 +403,7 @@ fenced_buffer_validate(struct pb_buffer *buf, /* Buffer cannot be validated in two different lists */ if(fenced_buf->vl && fenced_buf->vl != vl) { ret = PIPE_ERROR_RETRY; - goto finish; + goto done; } #if 0 @@ -409,7 +411,7 @@ fenced_buffer_validate(struct pb_buffer *buf, if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* TODO: wait for the thread that mapped the buffer to unmap it */ ret = PIPE_ERROR_RETRY; - goto finish; + goto done; } /* Final sanity checking */ assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); @@ -420,17 +422,17 @@ fenced_buffer_validate(struct pb_buffer *buf, (fenced_buf->validation_flags & flags) == flags) { /* Nothing to do -- buffer already validated */ ret = PIPE_OK; - goto finish; + goto done; } ret = pb_validate(fenced_buf->buffer, vl, flags); if (ret != PIPE_OK) - goto finish; + goto done; fenced_buf->vl = vl; fenced_buf->validation_flags |= flags; -finish: +done: pipe_mutex_unlock(fenced_buf->mutex); return ret; diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 01811d5011..ffed768f97 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -41,6 +41,12 @@ #define MAP_ANONYMOUS MAP_ANON #endif +#if defined(PIPE_OS_WINDOWS) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include <windows.h> +#endif #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) @@ -118,7 +124,29 @@ rtasm_exec_free(void *addr) } -#else /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#elif defined(PIPE_OS_WINDOWS) + + +/* + * Avoid Data Execution Prevention. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +} + + +void +rtasm_exec_free(void *addr) +{ + VirtualFree(addr, 0, MEM_RELEASE); +} + + +#else + /* * Just use regular memory. @@ -138,4 +166,4 @@ rtasm_exec_free(void *addr) } -#endif /* PIPE_OS_LINUX || PIPE_OS_BSD || PIPE_OS_SOLARIS */ +#endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 2c65ff16d8..e2e5394f86 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -128,7 +128,9 @@ static const char *semantic_names[] = static const char *immediate_type_names[] = { - "FLT32" + "FLT32", + "UINT32", + "INT32" }; static const char *swizzle_names[] = @@ -412,6 +414,12 @@ iter_immediate( case TGSI_IMM_FLOAT32: FLT( imm->u[i].Float ); break; + case TGSI_IMM_UINT32: + UID(imm->u[i].Uint); + break; + case TGSI_IMM_INT32: + SID(imm->u[i].Int); + break; default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ba89f2fbc3..2bcb33392a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,329 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; + dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; + dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; + dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; + dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; + dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; + dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; + dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; + dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; + dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +443,19 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; -#ifdef DEBUG -static void -check_inf_or_nan(const union tgsi_exec_channel *chan) -{ - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); -} -#endif +#define CHECK_INF_OR_NAN(chan) do {\ + assert(!util_is_inf_or_nan((chan)->f[0]));\ + assert(!util_is_inf_or_nan((chan)->f[1]));\ + assert(!util_is_inf_or_nan((chan)->f[2]));\ + assert(!util_is_inf_or_nan((chan)->f[3]));\ + } while (0) #ifdef DEBUG @@ -422,18 +738,6 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void micro_add( union tgsi_exec_channel *dst, @@ -446,76 +750,6 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} -#endif - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; -} - static void micro_div( union tgsi_exec_channel *dst, @@ -536,99 +770,6 @@ micro_div( } } -#if 0 -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} -#endif - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -#if 0 -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ -#if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); -#else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -#endif -} - -#if 0 -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} -#endif - static void micro_float_clamp(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -656,71 +797,6 @@ micro_float_clamp(union tgsi_exec_channel *dst, } static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -734,38 +810,6 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif - -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif - static void micro_max( union tgsi_exec_channel *dst, @@ -778,34 +822,6 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - static void micro_min( union tgsi_exec_channel *dst, @@ -818,48 +834,6 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif - -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif - -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif - static void micro_mul( union tgsi_exec_channel *dst, @@ -874,20 +848,6 @@ micro_mul( #if 0 static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif - -#if 0 -static void micro_imul64( union tgsi_exec_channel *dst0, union tgsi_exec_channel *dst1, @@ -951,42 +911,6 @@ micro_neg( dst->f[3] = -src->f[3]; } -#if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} -#endif - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - static void micro_pow( union tgsi_exec_channel *dst, @@ -1007,88 +931,6 @@ micro_pow( } static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} -#endif - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { @@ -1110,31 +952,6 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } -#if 0 -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} -#endif - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - static void fetch_src_file_channel( const struct tgsi_exec_machine *mach, @@ -1233,11 +1050,11 @@ fetch_src_file_channel( } static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { union tgsi_exec_channel index; uint swizzle; @@ -1286,10 +1103,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1366,10 +1183,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1394,32 +1211,30 @@ fetch_source( &index, chan ); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } - case TGSI_UTIL_SIGN_KEEP: - break; + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } } } static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { uint i; union tgsi_exec_channel null; @@ -1428,9 +1243,9 @@ store_dest( int offset = 0; /* indirection offset */ int index; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + if (dst_datatype == TGSI_EXEC_DATA_FLOAT) { + CHECK_INF_OR_NAN(chan); + } /* There is an extra source register that indirectly subscripts * a register file. The direct index now becomes an offset @@ -1465,7 +1280,7 @@ store_dest( &indir_index ); /* save indirection offset */ - offset = (int) indir_index.f[0]; + offset = indir_index.i[0]; } switch (reg->Register.File) { @@ -1595,10 +1410,10 @@ store_dest( } #define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) #define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) /** @@ -1694,7 +1509,8 @@ fetch_texel( struct tgsi_sampler *sampler, const union tgsi_exec_channel *s, const union tgsi_exec_channel *t, const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ + const union tgsi_exec_channel *c0, + enum tgsi_sampler_control control, union tgsi_exec_channel *r, union tgsi_exec_channel *g, union tgsi_exec_channel *b, @@ -1703,7 +1519,7 @@ fetch_texel( struct tgsi_sampler *sampler, uint j; float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); for (j = 0; j < 4; j++) { r->f[j] = rgba[0][j]; @@ -1714,102 +1530,95 @@ fetch_texel( struct tgsi_sampler *sampler, } +#define TEX_MODIFIER_NONE 0 +#define TEX_MODIFIER_PROJECTED 1 +#define TEX_MODIFIER_LOD_BIAS 2 +#define TEX_MODIFIER_EXPLICIT_LOD 3 + + static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) + uint modifier) { const uint unit = inst->Src[1].Register.Index; union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; uint chan_index; - float lodBias; - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + if (modifier != TEX_MODIFIER_NONE) { + FETCH(&r[3], 0, CHAN_W); + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - FETCH(&r[0], 0, CHAN_X); - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - else - lodBias = 0.0; fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], lod, + control, &r[0], &r[1], &r[2], &r[3]); break; default: - assert (0); + assert(0); } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); } } @@ -1832,8 +1641,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: @@ -1846,8 +1656,9 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; case TGSI_TEXTURE_3D: @@ -1858,7 +1669,8 @@ exec_txd(struct tgsi_exec_machine *mach, FETCH(&r[2], 0, CHAN_Z); fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1955,7 +1767,7 @@ exec_declaration(struct tgsi_exec_machine *mach, if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { assert(decl->Semantic.Index == 0); assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); + assert(mask == TGSI_WRITEMASK_XYZW); mach->Inputs[first] = mach->QuadPos; } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { @@ -2001,6 +1813,585 @@ exec_declaration(struct tgsi_exec_machine *mach, } } +typedef void (* micro_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); + +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + +static void +exec_dp3(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2a(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dph(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], arg); + + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_dp2(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], arg); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); + } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; + } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_default(struct tgsi_exec_machine *mach) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; + + UPDATE_EXEC_MASK(mach); +} + +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} + +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] << src[1].u[0]; + dst->u[1] = src[0].u[1] << src[1].u[1]; + dst->u[2] = src[0].u[2] << src[1].u[2]; + dst->u[3] = src[0].u[3] << src[1].u[3]; +} + +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] & src[1].u[0]; + dst->u[1] = src[0].u[1] & src[1].u[1]; + dst->u[2] = src[0].u[2] & src[1].u[2]; + dst->u[3] = src[0].u[3] & src[1].u[3]; +} + +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] | src[1].u[0]; + dst->u[1] = src[0].u[1] | src[1].u[1]; + dst->u[2] = src[0].u[2] | src[1].u[2]; + dst->u[3] = src[0].u[3] | src[1].u[3]; +} + +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] ^ src[1].u[0]; + dst->u[1] = src[0].u[1] ^ src[1].u[1]; + dst->u[2] = src[0].u[2] ^ src[1].u[2]; + dst->u[3] = src[0].u[3] ^ src[1].u[3]; +} + +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} + +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] / src[1].i[0]; + dst->i[1] = src[0].i[1] / src[1].i[1]; + dst->i[2] = src[0].i[2] / src[1].i[2]; + dst->i[3] = src[0].i[3] / src[1].i[3]; +} + +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; + dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; + dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; + dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; +} + +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; +} + +static void +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] >> src[1].i[0]; + dst->i[1] = src[0].i[1] >> src[1].i[1]; + dst->i[2] = src[0].i[2] >> src[1].i[2]; + dst->i[3] = src[0].i[3] >> src[1].i[3]; +} + +static void +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; + dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; + dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; + dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; +} + +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} + +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} + +static void +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] + src[1].u[0]; + dst->u[1] = src[0].u[1] + src[1].u[1]; + dst->u[2] = src[0].u[2] + src[1].u[2]; + dst->u[3] = src[0].u[3] + src[1].u[3]; +} + +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] / src[1].u[0]; + dst->u[1] = src[0].u[1] / src[1].u[1]; + dst->u[2] = src[0].u[2] / src[1].u[2]; + dst->u[3] = src[0].u[3] / src[1].u[3]; +} + +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; +} + +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; + dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; + dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; + dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; +} + +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] % src[1].u[0]; + dst->u[1] = src[0].u[1] % src[1].u[1]; + dst->u[2] = src[0].u[2] % src[1].u[2]; + dst->u[3] = src[0].u[3] % src[1].u[3]; +} + +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] * src[1].u[0]; + dst->u[1] = src[0].u[1] * src[1].u[1]; + dst->u[2] = src[0].u[2] * src[1].u[2]; + dst->u[3] = src[0].u[3] * src[1].u[3]; +} + +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; +} + +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; +} + +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] >> src[1].u[0]; + dst->u[1] = src[0].u[1] >> src[1].u[1]; + dst->u[2] = src[0].u[2] >> src[1].u[2]; + dst->u[3] = src[0].u[3] >> src[1].u[3]; +} + +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; + dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; + dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; + dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; +} + static void exec_instruction( struct tgsi_exec_machine *mach, @@ -2015,23 +2406,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2068,23 +2447,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2151,54 +2518,11 @@ exec_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp3(mach, inst); break; - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + case TGSI_OPCODE_DP4: + exec_dp4(mach, inst); break; case TGSI_OPCODE_DST: @@ -2255,41 +2579,15 @@ exec_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: @@ -2304,17 +2602,7 @@ exec_instruction( break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: @@ -2330,31 +2618,11 @@ exec_instruction( break; case TGSI_OPCODE_DP2A: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[2], 2, CHAN_X ); - micro_add( &r[0], &r[0], &r[2] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2a(mach, inst); break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: @@ -2370,33 +2638,20 @@ exec_instruction( } break; + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2449,15 +2704,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2469,60 +2718,19 @@ exec_instruction( break; case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dph(mach, inst); break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2599,14 +2807,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2616,44 +2817,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2666,14 +2842,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_TXD: @@ -2689,14 +2865,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); + exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_UP2H: @@ -2758,6 +2934,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2777,6 +2957,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2784,12 +2966,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2822,6 +3009,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2832,14 +3025,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: @@ -2946,18 +3132,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2(mach, inst); break; case TGSI_OPCODE_IF: @@ -3023,87 +3198,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3111,14 +3230,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3167,11 +3279,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3218,6 +3334,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3241,15 +3359,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3280,6 +3397,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3310,11 +3433,116 @@ exec_instruction( UPDATE_EXEC_MASK(mach); break; + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3334,9 +3562,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3393,11 +3625,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3411,11 +3643,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3437,6 +3669,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index afaf5c39c4..59e3b445cc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ extern "C" { #endif + #define MAX_LABELS (4 * 1024) /**< basically, max instructions */ #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ + /** * Registers may be treated as float, signed int or unsigned int. */ @@ -69,6 +72,11 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; +enum tgsi_sampler_control { + tgsi_sampler_lod_bias, + tgsi_sampler_lod_explicit +}; + /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. @@ -80,7 +88,8 @@ struct tgsi_sampler const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); }; @@ -179,6 +188,7 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_COND_NESTING 32 #define TGSI_EXEC_MAX_LOOP_NESTING 32 +#define TGSI_EXEC_MAX_SWITCH_NESTING 32 #define TGSI_EXEC_MAX_CALL_NESTING 32 /* The maximum number of input attributes per vertex. For 2D @@ -206,9 +216,29 @@ struct tgsi_call_record uint CondStackTop; uint LoopStackTop; uint ContStackTop; + int SwitchStackTop; + int BreakStackTop; uint ReturnAddr; }; + +/* Switch-case block state. */ +struct tgsi_switch_record { + uint mask; /**< execution mask */ + union tgsi_exec_channel selector; /**< a value case statements are compared to */ + uint defaultMask; /**< non-execute mask for default case */ +}; + + +enum tgsi_break_type { + TGSI_EXEC_BREAK_INSIDE_LOOP, + TGSI_EXEC_BREAK_INSIDE_SWITCH +}; + + +#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING) + + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -251,6 +281,12 @@ struct tgsi_exec_machine uint FuncMask; /**< For function calls */ uint ExecMask; /**< = CondMask & LoopMask */ + /* Current switch-case state. */ + struct tgsi_switch_record Switch; + + /* Current break type. */ + enum tgsi_break_type BreakType; + /** Condition mask stack (for nested conditionals) */ uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; int CondStackTop; @@ -271,6 +307,13 @@ struct tgsi_exec_machine uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; + /** Switch case stack */ + struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING]; + int SwitchStackTop; + + enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK]; + int BreakStackTop; + /** Function execution mask stack (for executing subroutine code) */ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; int FuncStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index be375cabb8..de0e09cdba 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -119,7 +119,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, "NOT", TGSI_OPCODE_NOT }, { 1, 1, 0, 0, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC }, { 1, 2, 0, 0, 0, 0, "SHL", TGSI_OPCODE_SHL }, - { 1, 2, 0, 0, 0, 0, "SHR", TGSI_OPCODE_SHR }, + { 0, 0, 0, 0, 0, 0, "", 88 }, /* removed */ { 1, 2, 0, 0, 0, 0, "AND", TGSI_OPCODE_AND }, { 1, 2, 0, 0, 0, 0, "OR", TGSI_OPCODE_OR }, { 1, 2, 0, 0, 0, 0, "MOD", TGSI_OPCODE_MOD }, @@ -149,7 +149,33 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC }, { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL }, { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END }, - { 0, 0, 0, 0, 0, 0, "", 118 } /* removed */ + { 0, 0, 0, 0, 0, 0, "", 118 }, /* removed */ + { 1, 1, 0, 0, 0, 0, "F2I", TGSI_OPCODE_F2I }, + { 1, 2, 0, 0, 0, 0, "IDIV", TGSI_OPCODE_IDIV }, + { 1, 2, 0, 0, 0, 0, "IMAX", TGSI_OPCODE_IMAX }, + { 1, 2, 0, 0, 0, 0, "IMIN", TGSI_OPCODE_IMIN }, + { 1, 1, 0, 0, 0, 0, "INEG", TGSI_OPCODE_INEG }, + { 1, 2, 0, 0, 0, 0, "ISGE", TGSI_OPCODE_ISGE }, + { 1, 2, 0, 0, 0, 0, "ISHR", TGSI_OPCODE_ISHR }, + { 1, 2, 0, 0, 0, 0, "ISLT", TGSI_OPCODE_ISLT }, + { 1, 1, 0, 0, 0, 0, "F2U", TGSI_OPCODE_F2U }, + { 1, 1, 0, 0, 0, 0, "U2F", TGSI_OPCODE_U2F }, + { 1, 2, 0, 0, 0, 0, "UADD", TGSI_OPCODE_UADD }, + { 1, 2, 0, 0, 0, 0, "UDIV", TGSI_OPCODE_UDIV }, + { 1, 3, 0, 0, 0, 0, "UMAD", TGSI_OPCODE_UMAD }, + { 1, 2, 0, 0, 0, 0, "UMAX", TGSI_OPCODE_UMAX }, + { 1, 2, 0, 0, 0, 0, "UMIN", TGSI_OPCODE_UMIN }, + { 1, 2, 0, 0, 0, 0, "UMOD", TGSI_OPCODE_UMOD }, + { 1, 2, 0, 0, 0, 0, "UMUL", TGSI_OPCODE_UMUL }, + { 1, 2, 0, 0, 0, 0, "USEQ", TGSI_OPCODE_USEQ }, + { 1, 2, 0, 0, 0, 0, "USGE", TGSI_OPCODE_USGE }, + { 1, 2, 0, 0, 0, 0, "USHR", TGSI_OPCODE_USHR }, + { 1, 2, 0, 0, 0, 0, "USLT", TGSI_OPCODE_USLT }, + { 1, 2, 0, 0, 0, 0, "USNE", TGSI_OPCODE_USNE }, + { 0, 1, 0, 0, 0, 0, "SWITCH", TGSI_OPCODE_SWITCH }, + { 0, 1, 0, 0, 0, 0, "CASE", TGSI_OPCODE_CASE }, + { 0, 0, 0, 0, 0, 0, "DEFAULT", TGSI_OPCODE_DEFAULT }, + { 0, 0, 0, 0, 0, 0, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH } }; const struct tgsi_opcode_info * diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index b34263da48..e4af15c156 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -124,7 +124,6 @@ OP11(I2F) OP11(NOT) OP11(TRUNC) OP12(SHL) -OP12(SHR) OP12(AND) OP12(OR) OP12(MOD) @@ -146,6 +145,28 @@ OP01(IFC) OP01(BREAKC) OP01(KIL) OP00(END) +OP11(F2I) +OP12(IDIV) +OP12(IMAX) +OP12(IMIN) +OP11(INEG) +OP12(ISGE) +OP12(ISHR) +OP12(ISLT) +OP11(F2U) +OP11(U2F) +OP12(UADD) +OP12(UDIV) +OP13(UMAD) +OP12(UMAX) +OP12(UMIN) +OP12(UMOD) +OP12(UMUL) +OP12(USEQ) +OP12(USGE) +OP12(USHR) +OP12(USLT) +OP12(USNE) #undef OP00 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index fa65ecb997..8c7062d850 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -119,17 +119,29 @@ tgsi_parse_token( case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + uint imm_count; memset(imm, 0, sizeof *imm); copy_token(&imm->Immediate, &token); + imm_count = imm->Immediate.NrTokens - 1; + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - { - uint imm_count = imm->Immediate.NrTokens - 1; - for (i = 0; i < imm_count; i++) { - next_token(ctx, &imm->u[i]); - } + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Float); + } + break; + + case TGSI_IMM_UINT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Uint); + } + break; + + case TGSI_IMM_INT32: + for (i = 0; i < imm_count; i++) { + next_token(ctx, &imm->u[i].Int); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 16b8ec6051..7f1c8e5dd6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -195,7 +195,7 @@ is_any_register_declared( struct cso_hash_iter iter = cso_hash_first_node(ctx->regs_decl); - while (cso_hash_iter_is_null(iter)) { + while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); if (reg->file == file) return TRUE; @@ -247,7 +247,7 @@ check_register_usage( boolean indirect_access ) { if (!check_file_name( ctx, reg->file )) { - free(reg); + FREE(reg); return FALSE; } @@ -261,21 +261,23 @@ check_register_usage( if (!is_ind_register_used(ctx, reg)) cso_hash_insert(ctx->regs_ind_used, reg->file, reg); else - free(reg); + FREE(reg); } else { if (!is_register_declared( ctx, reg )) { - if (reg->dimensions == 2) + if (reg->dimensions == 2) { report_error( ctx, "%s[%d][%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], reg->indices[1], name ); - else + } + else { report_error( ctx, "%s[%d]: Undeclared %s register", file_names[reg->file], reg->indices[0], name ); } + } if (!is_register_used( ctx, reg )) cso_hash_insert(ctx->regs_used, scan_register_key(reg), reg); else - free(reg); + FREE(reg); } return TRUE; } @@ -333,15 +335,15 @@ iter_instruction( fill_scan_register1d(ind_reg, inst->Src[i].Indirect.File, inst->Src[i].Indirect.Index); + if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || + reg->indices[0] != 0) { + report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); + } check_register_usage( ctx, reg, "indirect", FALSE ); - if (!(reg->file == TGSI_FILE_ADDRESS || reg->file == TGSI_FILE_LOOP) || - reg->indices[0] != 0) { - report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]"); - } } } @@ -445,7 +447,9 @@ iter_immediate( /* Check data type validity. */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32 && + imm->Immediate.DataType != TGSI_IMM_UINT32 && + imm->Immediate.DataType != TGSI_IMM_INT32) { report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -486,7 +490,7 @@ epilog( struct cso_hash_iter iter = cso_hash_first_node(ctx->regs_decl); - while (cso_hash_iter_is_null(iter)) { + while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); if (!is_register_used(ctx, reg) && !is_ind_register_used(ctx, reg)) { report_warning( ctx, "%s[%u]: Register never used", @@ -511,7 +515,8 @@ regs_hash_destroy(struct cso_hash *hash) while (!cso_hash_iter_is_null(iter)) { scan_register *reg = (scan_register *)cso_hash_iter_data(iter); iter = cso_hash_erase(hash, iter); - free(reg); + assert(reg->file < TGSI_FILE_COUNT); + FREE(reg); } cso_hash_delete(hash); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d63c75dafb..a85cc4659e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler, sampler, *sampler, store ); - debug_printf("lodbias %f\n", store[12]); - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", + debug_printf("sample %d texcoord %f %f %f lodbias %f\n", j, store[0+j], - store[4+j]); + store[4+j], + store[8 + j], + store[12 + j]); #endif { @@ -1433,7 +1434,8 @@ fetch_texel( struct tgsi_sampler **sampler, &store[0], /* s */ &store[4], /* t */ &store[8], /* r */ - store[12], /* lodbias */ + &store[12], /* lodbias */ + tgsi_sampler_lod_bias, rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); @@ -2144,40 +2146,50 @@ emit_instruction( break; case TGSI_OPCODE_XPD: + /* Note: we do all stores after all operands have been fetched + * to avoid src/dst register aliasing issues for an instruction + * such as: XPD TEMP[2].xyz, TEMP[0], TEMP[2]; + */ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); + FETCH( func, *inst, 1, 1, CHAN_Z ); /* xmm[1] = src[1].z */ + FETCH( func, *inst, 3, 0, CHAN_Z ); /* xmm[3] = src[0].z */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_Y ); /* xmm[0] = src[0].y */ + FETCH( func, *inst, 4, 1, CHAN_Y ); /* xmm[4] = src[1].y */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); + emit_MOV( func, 7, 0 ); /* xmm[7] = xmm[0] */ + emit_mul( func, 7, 1 ); /* xmm[7] = xmm[2] * xmm[1] */ + emit_MOV( func, 5, 3 ); /* xmm[5] = xmm[3] */ + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_sub( func, 7, 5 ); /* xmm[7] = xmm[2] - xmm[5] */ + /* store xmm[7] in dst.x below */ } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); + FETCH( func, *inst, 2, 1, CHAN_X ); /* xmm[2] = src[1].x */ + FETCH( func, *inst, 5, 0, CHAN_X ); /* xmm[5] = src[0].x */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); + emit_mul( func, 3, 2 ); /* xmm[3] = xmm[3] * xmm[2] */ + emit_mul( func, 1, 5 ); /* xmm[1] = xmm[1] * xmm[5] */ + emit_sub( func, 3, 1 ); /* xmm[3] = xmm[3] - xmm[1] */ + /* store xmm[3] in dst.y below */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); + emit_mul( func, 5, 4 ); /* xmm[5] = xmm[5] * xmm[4] */ + emit_mul( func, 0, 2 ); /* xmm[0] = xmm[0] * xmm[2] */ + emit_sub( func, 5, 0 ); /* xmm[5] = xmm[5] - xmm[0] */ + STORE( func, *inst, 5, 0, CHAN_Z ); /* dst.z = xmm[5] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + STORE( func, *inst, 7, 0, CHAN_X ); /* dst.x = xmm[7] */ + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + STORE( func, *inst, 3, 0, CHAN_Y ); /* dst.y = xmm[3] */ } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { emit_tempf( @@ -2506,7 +2518,7 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( func, inst, TRUE, FALSE ); + return 0; break; case TGSI_OPCODE_TXP: @@ -2578,7 +2590,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: return 0; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 6a0af664dd..e64e2b731d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -101,8 +101,13 @@ struct ureg_program unsigned nr_outputs; struct { - float v[4]; + union { + float f[4]; + unsigned u[4]; + int i[4]; + } value; unsigned nr; + unsigned type; } immediate[UREG_MAX_IMMEDIATE]; unsigned nr_immediates; @@ -486,22 +491,22 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg, } - - -static int match_or_expand_immediate( const float *v, - unsigned nr, - float *v2, - unsigned *nr2, - unsigned *swizzle ) +static int +match_or_expand_immediate( const unsigned *v, + unsigned nr, + unsigned *v2, + unsigned *pnr2, + unsigned *swizzle ) { + unsigned nr2 = *pnr2; unsigned i, j; - + *swizzle = 0; for (i = 0; i < nr; i++) { boolean found = FALSE; - for (j = 0; j < *nr2 && !found; j++) { + for (j = 0; j < nr2 && !found; j++) { if (v[i] == v2[j]) { *swizzle |= j << (i * 2); found = TRUE; @@ -509,24 +514,28 @@ static int match_or_expand_immediate( const float *v, } if (!found) { - if (*nr2 >= 4) + if (nr2 >= 4) { return FALSE; + } - v2[*nr2] = v[i]; - *swizzle |= *nr2 << (i * 2); - (*nr2)++; + v2[nr2] = v[i]; + *swizzle |= nr2 << (i * 2); + nr2++; } } + /* Actually expand immediate only when fully succeeded. + */ + *pnr2 = nr2; return TRUE; } - - -struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, - const float *v, - unsigned nr ) +static struct ureg_src +decl_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned nr, + unsigned type ) { unsigned i, j; unsigned swizzle; @@ -536,38 +545,82 @@ struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg, */ for (i = 0; i < ureg->nr_immediates; i++) { - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + if (ureg->immediate[i].type != type) { + continue; + } + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) { i = ureg->nr_immediates++; - if (match_or_expand_immediate( v, - nr, - ureg->immediate[i].v, - &ureg->immediate[i].nr, - &swizzle )) + ureg->immediate[i].type = type; + if (match_or_expand_immediate(v, + nr, + ureg->immediate[i].value.u, + &ureg->immediate[i].nr, + &swizzle)) { goto out; + } } - set_bad( ureg ); + set_bad(ureg); out: /* Make sure that all referenced elements are from this immediate. * Has the effect of making size-one immediates into scalars. */ - for (j = nr; j < 4; j++) + for (j = nr; j < 4; j++) { swizzle |= (swizzle & 0x3) << (j * 2); + } + + return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i), + (swizzle >> 0) & 0x3, + (swizzle >> 2) & 0x3, + (swizzle >> 4) & 0x3, + (swizzle >> 6) & 0x3); +} + + +struct ureg_src +ureg_DECL_immediate( struct ureg_program *ureg, + const float *v, + unsigned nr ) +{ + union { + float f[4]; + unsigned u[4]; + } fu; + unsigned int i; + + for (i = 0; i < nr; i++) { + fu.f[i] = v[i]; + } + + return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32); +} + - return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ), - (swizzle >> 0) & 0x3, - (swizzle >> 2) & 0x3, - (swizzle >> 4) & 0x3, - (swizzle >> 6) & 0x3); +struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *ureg, + const unsigned *v, + unsigned nr ) +{ + return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32); +} + + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *ureg, + const int *v, + unsigned nr ) +{ + return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32); } @@ -955,21 +1008,23 @@ static void emit_decl_range( struct ureg_program *ureg, out[1].decl_range.Last = first + count - 1; } -static void emit_immediate( struct ureg_program *ureg, - const float *v ) +static void +emit_immediate( struct ureg_program *ureg, + const unsigned *v, + unsigned type ) { union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 ); out[0].value = 0; out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE; out[0].imm.NrTokens = 5; - out[0].imm.DataType = TGSI_IMM_FLOAT32; + out[0].imm.DataType = type; out[0].imm.Padding = 0; - out[1].imm_data.Float = v[0]; - out[2].imm_data.Float = v[1]; - out[3].imm_data.Float = v[2]; - out[4].imm_data.Float = v[3]; + out[1].imm_data.Uint = v[0]; + out[2].imm_data.Uint = v[1]; + out[3].imm_data.Uint = v[2]; + out[4].imm_data.Uint = v[3]; } @@ -1055,7 +1110,8 @@ static void emit_decls( struct ureg_program *ureg ) for (i = 0; i < ureg->nr_immediates; i++) { emit_immediate( ureg, - ureg->immediate[i].v ); + ureg->immediate[i].value.u, + ureg->immediate[i].type ); } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 7e3e7bcf1d..6f11273320 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -148,6 +148,16 @@ ureg_DECL_immediate( struct ureg_program *, unsigned nr ); struct ureg_src +ureg_DECL_immediate_uint( struct ureg_program *, + const unsigned *v, + unsigned nr ); + +struct ureg_src +ureg_DECL_immediate_int( struct ureg_program *, + const int *v, + unsigned nr ); + +struct ureg_src ureg_DECL_constant( struct ureg_program *, unsigned index ); @@ -221,6 +231,90 @@ ureg_imm1f( struct ureg_program *ureg, return ureg_DECL_immediate( ureg, v, 1 ); } +static INLINE struct ureg_src +ureg_imm4u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c, unsigned d) +{ + unsigned v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_uint( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3u( struct ureg_program *ureg, + unsigned a, unsigned b, + unsigned c) +{ + unsigned v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_uint( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2u( struct ureg_program *ureg, + unsigned a, unsigned b) +{ + unsigned v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_uint( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1u( struct ureg_program *ureg, + unsigned a) +{ + return ureg_DECL_immediate_uint( ureg, &a, 1 ); +} + +static INLINE struct ureg_src +ureg_imm4i( struct ureg_program *ureg, + int a, int b, + int c, int d) +{ + int v[4]; + v[0] = a; + v[1] = b; + v[2] = c; + v[3] = d; + return ureg_DECL_immediate_int( ureg, v, 4 ); +} + +static INLINE struct ureg_src +ureg_imm3i( struct ureg_program *ureg, + int a, int b, + int c) +{ + int v[3]; + v[0] = a; + v[1] = b; + v[2] = c; + return ureg_DECL_immediate_int( ureg, v, 3 ); +} + +static INLINE struct ureg_src +ureg_imm2i( struct ureg_program *ureg, + int a, int b) +{ + int v[2]; + v[0] = a; + v[1] = b; + return ureg_DECL_immediate_int( ureg, v, 2 ); +} + +static INLINE struct ureg_src +ureg_imm1i( struct ureg_program *ureg, + int a) +{ + return ureg_DECL_immediate_int( ureg, &a, 1 ); +} + /*********************************************************************** * Functions for patching up labels */ diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c index 77587c07ec..23c93a3ebc 100644 --- a/src/gallium/auxiliary/util/u_bitmask.c +++ b/src/gallium/auxiliary/util/u_bitmask.c @@ -97,12 +97,12 @@ util_bitmask_resize(struct util_bitmask *bm, if(!minimum_size) return FALSE; - if(bm->size > minimum_size) + if(bm->size >= minimum_size) return TRUE; assert(bm->size % UTIL_BITMASK_BITS_PER_WORD == 0); new_size = bm->size; - while(!(new_size > minimum_size)) { + while(new_size < minimum_size) { new_size *= 2; /* Check integer overflow */ if(new_size < bm->size) @@ -136,7 +136,7 @@ util_bitmask_filled_set(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index == bm->filled) { ++bm->filled; @@ -149,7 +149,7 @@ util_bitmask_filled_unset(struct util_bitmask *bm, unsigned index) { assert(bm->filled <= bm->size); - assert(index <= bm->size); + assert(index < bm->size); if(index < bm->filled) bm->filled = index; @@ -182,7 +182,7 @@ util_bitmask_add(struct util_bitmask *bm) mask = 1; } found: - + /* grow the bitmask if necessary */ if(!util_bitmask_resize(bm, bm->filled)) return UTIL_BITMASK_INVALID_INDEX; @@ -198,9 +198,9 @@ unsigned util_bitmask_set(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); @@ -208,6 +208,10 @@ util_bitmask_set(struct util_bitmask *bm, if(!util_bitmask_resize(bm, index)) return UTIL_BITMASK_INVALID_INDEX; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] |= mask; util_bitmask_filled_set(bm, index); @@ -220,15 +224,19 @@ void util_bitmask_clear(struct util_bitmask *bm, unsigned index) { - unsigned word = index / UTIL_BITMASK_BITS_PER_WORD; - unsigned bit = index % UTIL_BITMASK_BITS_PER_WORD; - util_bitmask_word mask = 1 << bit; + unsigned word; + unsigned bit; + util_bitmask_word mask; assert(bm); if(index >= bm->size) return; + word = index / UTIL_BITMASK_BITS_PER_WORD; + bit = index % UTIL_BITMASK_BITS_PER_WORD; + mask = 1 << bit; + bm->words[word] &= ~mask; util_bitmask_filled_unset(bm, index); @@ -250,7 +258,7 @@ util_bitmask_get(struct util_bitmask *bm, return TRUE; } - if(index > bm->size) + if(index >= bm->size) return FALSE; if(bm->words[word] & mask) { diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 1f794d39a1..46b4706b76 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -48,6 +48,8 @@ #include "util/u_simple_shaders.h" #include "util/u_texture.h" +#define INVALID_PTR ((void*)~0) + struct blitter_context_priv { struct blitter_context blitter; @@ -110,6 +112,11 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) ctx->pipe = pipe; /* init state objects for them to be considered invalid */ + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; ctx->blitter.saved_fb_state.nr_cbufs = ~0; ctx->blitter.saved_num_textures = ~0; ctx->blitter.saved_num_sampler_states = ~0; @@ -156,6 +163,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) rs_state.cull_mode = PIPE_WINDING_NONE; rs_state.bypass_vs_clip_and_viewport = 1; rs_state.gl_rasterization_rules = 1; + rs_state.flatshade = 1; ctx->rs_state = pipe->create_rasterizer_state(pipe, &rs_state); /* fragment shaders are created on-demand */ @@ -234,11 +242,11 @@ void util_blitter_destroy(struct blitter_context *blitter) static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) { /* make sure these CSOs have been saved */ - assert(ctx->blitter.saved_blend_state && - ctx->blitter.saved_dsa_state && - ctx->blitter.saved_rs_state && - ctx->blitter.saved_fs && - ctx->blitter.saved_vs); + assert(ctx->blitter.saved_blend_state != INVALID_PTR && + ctx->blitter.saved_dsa_state != INVALID_PTR && + ctx->blitter.saved_rs_state != INVALID_PTR && + ctx->blitter.saved_fs != INVALID_PTR && + ctx->blitter.saved_vs != INVALID_PTR); } static void blitter_restore_CSOs(struct blitter_context_priv *ctx) @@ -252,11 +260,11 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->bind_fs_state(pipe, ctx->blitter.saved_fs); pipe->bind_vs_state(pipe, ctx->blitter.saved_vs); - ctx->blitter.saved_blend_state = 0; - ctx->blitter.saved_dsa_state = 0; - ctx->blitter.saved_rs_state = 0; - ctx->blitter.saved_fs = 0; - ctx->blitter.saved_vs = 0; + ctx->blitter.saved_blend_state = INVALID_PTR; + ctx->blitter.saved_dsa_state = INVALID_PTR; + ctx->blitter.saved_rs_state = INVALID_PTR; + ctx->blitter.saved_fs = INVALID_PTR; + ctx->blitter.saved_vs = INVALID_PTR; /* restore the state objects which are required to be saved before copy/fill */ @@ -560,45 +568,29 @@ void util_blitter_clear(struct blitter_context *blitter, blitter_restore_CSOs(ctx); } -void util_blitter_copy(struct blitter_context *blitter, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height, - boolean ignore_stencil) +static boolean +is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2, + unsigned dx1, unsigned dx2, unsigned dy1, unsigned dy2) +{ + if (sx1 >= dx2 || sx2 <= dx1 || sy1 >= dy2 || sy2 <= dy1) { + return FALSE; + } else { + return TRUE; + } +} + +static void util_blitter_do_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean is_depth) { struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb_state; - boolean is_stencil, is_depth; - unsigned dst_tex_usage; - - /* give up if textures are not set */ - assert(dst->texture && src->texture); - if (!dst->texture || !src->texture) - return; - - is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; - is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; - dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : - PIPE_TEXTURE_USAGE_RENDER_TARGET; - /* check if we can sample from and render to the surfaces */ - /* (assuming copying a stencil buffer is not possible) */ - if ((!ignore_stencil && is_stencil) || - !screen->is_format_supported(screen, dst->format, dst->texture->target, - dst_tex_usage, 0) || - !screen->is_format_supported(screen, src->format, src->texture->target, - PIPE_TEXTURE_USAGE_SAMPLER, 0)) { - util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, - width, height); - return; - } - - /* check whether the states are properly saved */ - blitter_check_saved_CSOs(ctx); assert(blitter->saved_fb_state.nr_cbufs != ~0); assert(blitter->saved_num_textures != ~0); assert(blitter->saved_num_sampler_states != ~0); @@ -656,6 +648,108 @@ void util_blitter_copy(struct blitter_context *blitter, blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0); blitter_draw_quad(ctx); + +} + +static void util_blitter_overlap_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + + struct pipe_texture texTemp; + struct pipe_texture *texture; + struct pipe_surface *tex_surf; + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = dst->texture->format; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width0 = width; + texTemp.height0 = height; + texTemp.depth0 = 1; + + texture = screen->texture_create(screen, &texTemp); + if (!texture) + return; + + tex_surf = screen->get_tex_surface(screen, texture, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* blit from the src to the temp */ + util_blitter_do_copy(blitter, tex_surf, 0, 0, + src, srcx, srcy, + width, height, + FALSE); + util_blitter_do_copy(blitter, dst, dstx, dsty, + tex_surf, 0, 0, + width, height, + FALSE); + pipe_surface_reference(&tex_surf, NULL); + pipe_texture_reference(&texture, NULL); + blitter_restore_CSOs(ctx); +} + +void util_blitter_copy(struct blitter_context *blitter, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height, + boolean ignore_stencil) +{ + struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter; + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + boolean is_stencil, is_depth; + unsigned dst_tex_usage; + + /* give up if textures are not set */ + assert(dst->texture && src->texture); + if (!dst->texture || !src->texture) + return; + + if (dst->texture == src->texture) { + if (is_overlap(srcx, srcx + width, srcy, srcy + height, + dstx, dstx + width, dsty, dsty + height)) { + util_blitter_overlap_copy(blitter, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + } + + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0; + dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + /* check if we can sample from and render to the surfaces */ + /* (assuming copying a stencil buffer is not possible) */ + if ((!ignore_stencil && is_stencil) || + !screen->is_format_supported(screen, dst->format, dst->texture->target, + dst_tex_usage, 0) || + !screen->is_format_supported(screen, src->format, src->texture->target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + util_surface_copy(pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, + width, height); + return; + } + + /* check whether the states are properly saved */ + blitter_check_saved_CSOs(ctx); + util_blitter_do_copy(blitter, + dst, dstx, dsty, + src, srcx, srcy, + width, height, is_depth); blitter_restore_CSOs(ctx); } diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c index 09866880ae..61624d05c0 100644 --- a/src/gallium/auxiliary/util/u_debug_dump.c +++ b/src/gallium/auxiliary/util/u_debug_dump.c @@ -255,15 +255,13 @@ DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter) static const char * debug_dump_tex_filter_names[] = { "PIPE_TEX_FILTER_NEAREST", - "PIPE_TEX_FILTER_LINEAR", - "PIPE_TEX_FILTER_ANISO" + "PIPE_TEX_FILTER_LINEAR" }; static const char * debug_dump_tex_filter_short_names[] = { "nearest", - "linear", - "aniso" + "linear" }; DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter) diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c index 7623cb9398..d6484f4ad5 100644 --- a/src/gallium/auxiliary/util/u_debug_memory.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -297,9 +297,9 @@ debug_memory_end(unsigned long start_no) if((start_no <= hdr->no && hdr->no < last_no) || (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { - debug_printf("%s:%u:%s: %u bytes at %p not freed\n", + debug_printf("%s:%u:%s: %lu bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, - hdr->size, ptr); + (unsigned long) hdr->size, ptr); #if DEBUG_MEMORY_STACK debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK); #endif @@ -315,8 +315,8 @@ debug_memory_end(unsigned long start_no) } if(total_size) { - debug_printf("Total of %u KB of system memory apparently leaked\n", - (total_size + 1023)/1024); + debug_printf("Total of %lu KB of system memory apparently leaked\n", + (unsigned long) (total_size + 1023)/1024); } else { debug_printf("No memory leaks detected.\n"); diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv index 866b18ff16..9f16b42944 100644 --- a/src/gallium/auxiliary/util/u_format.csv +++ b/src/gallium/auxiliary/util/u_format.csv @@ -76,9 +76,9 @@ PIPE_FORMAT_R8G8_SNORM , array , 1, 1, sn8 , sn8 , , , xy01, PIPE_FORMAT_R8G8B8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , , xyz1, rgb PIPE_FORMAT_R8G8B8A8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyzw, rgb PIPE_FORMAT_R8G8B8X8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , xyz1, rgb -PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , zyx1, rgb -PIPE_FORMAT_A8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyxw, rgb -PIPE_FORMAT_X8B8G8R8_SNORM , arith , 1, 1, sn8 , sn8 , sn8 , sn8 , zyx1, rgb +PIPE_FORMAT_B6G5R5_SNORM , arith , 1, 1, sn5 , sn5 , sn6 , , xyz1, rgb +PIPE_FORMAT_A8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb +PIPE_FORMAT_X8B8G8R8_SNORM , array , 1, 1, sn8 , sn8 , sn8 , sn8 , wzy1, rgb PIPE_FORMAT_R8_SSCALED , array , 1, 1, s8 , , , , x001, rgb PIPE_FORMAT_R8G8_SSCALED , array , 1, 1, s8 , s8 , , , xy01, rgb PIPE_FORMAT_R8G8B8_SSCALED , array , 1, 1, s8 , s8 , s8 , , xyz1, rgb @@ -90,14 +90,14 @@ PIPE_FORMAT_R32G32B32_FIXED , array , 1, 1, h32 , h32 , h32 , , xyz1, PIPE_FORMAT_R32G32B32A32_FIXED , array , 1, 1, h32 , h32 , h32 , h32 , xyzw, rgb PIPE_FORMAT_L8_SRGB , arith , 1, 1, u8 , , , , xxx1, srgb PIPE_FORMAT_A8L8_SRGB , arith , 1, 1, u8 , u8 , , , xxxy, srgb -PIPE_FORMAT_R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , , xyz1, srgb -PIPE_FORMAT_R8G8B8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb -PIPE_FORMAT_R8G8B8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb -PIPE_FORMAT_A8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , wxyz, srgb -PIPE_FORMAT_X8R8G8B8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , 1xyz, srgb -PIPE_FORMAT_B8G8R8A8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb -PIPE_FORMAT_B8G8R8X8_SRGB , arith , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb -PIPE_FORMAT_X8UB8UG8SR8S_NORM , arith , 1, 1, sn8 , sn8 , un8 , x8 , 1zyx, rgb +PIPE_FORMAT_R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , , xyz1, srgb +PIPE_FORMAT_R8G8B8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyzw, srgb +PIPE_FORMAT_R8G8B8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , xyz1, srgb +PIPE_FORMAT_A8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzwx, srgb +PIPE_FORMAT_X8R8G8B8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , yzw1, srgb +PIPE_FORMAT_B8G8R8A8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyxw, srgb +PIPE_FORMAT_B8G8R8X8_SRGB , array , 1, 1, u8 , u8 , u8 , u8 , zyx1, srgb +PIPE_FORMAT_X8UB8UG8SR8S_NORM , array , 1, 1, sn8 , sn8 , un8 , x8 , wzy1, rgb PIPE_FORMAT_B6UG5SR5S_NORM , arith , 1, 1, sn5 , sn5 , un6 , , xyz1, rgb PIPE_FORMAT_DXT1_RGB , dxt , 4, 4, x64 , , , , xyz1, rgb PIPE_FORMAT_DXT1_RGBA , dxt , 4, 4, x64 , , , , xyzw, rgb diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c index 9eb8f309cd..87ee0e4768 100644 --- a/src/gallium/auxiliary/util/u_network.c +++ b/src/gallium/auxiliary/util/u_network.c @@ -117,7 +117,7 @@ u_socket_connect(const char *hostname, uint16_t port) if (!host) return -1; - memcpy((char *)&sa.sin_addr,host->h_addr,host->h_length); + memcpy((char *)&sa.sin_addr,host->h_addr_list[0],host->h_length); sa.sin_family= host->h_addrtype; sa.sin_port = htons(port); diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 298fbacecb..8479161c74 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -41,7 +41,7 @@ /** * Copy 2D rect from one place to another. * Position and sizes are in pixels. - * src_pitch may be negative to do vertical flip of pixels from source. + * src_stride may be negative to do vertical flip of pixels from source. */ void util_copy_rect(ubyte * dst, @@ -54,7 +54,7 @@ util_copy_rect(ubyte * dst, const ubyte * src, int src_stride, unsigned src_x, - int src_y) + unsigned src_y) { unsigned i; int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; @@ -65,10 +65,6 @@ util_copy_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(src_x >= 0); - assert(src_y >= 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; @@ -113,8 +109,6 @@ util_fill_rect(ubyte * dst, assert(blocksize > 0); assert(blockwidth > 0); assert(blockheight > 0); - assert(dst_x >= 0); - assert(dst_y >= 0); dst_x /= blockwidth; dst_y /= blockheight; diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index 5e444ffae2..b44d821904 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -45,7 +45,7 @@ extern void util_copy_rect(ubyte * dst, enum pipe_format format, unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, - int src_stride, unsigned src_x, int src_y); + int src_stride, unsigned src_x, unsigned src_y); extern void util_fill_rect(ubyte * dst, enum pipe_format format, diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 8172ead020..b751e29ab6 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -44,13 +44,15 @@ /** * Make simple vertex pass-through shader. + * \param num_attribs number of attributes to pass through + * \param semantic_names array of semantic names for each attribute + * \param semantic_indexes array of semantic indexes for each attribute */ void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, const uint *semantic_indexes) - { struct ureg_program *ureg; uint i; @@ -78,8 +80,6 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, } - - /** * Make simple fragment texture shader: * IMM {0,0,0,1} // (if writemask != 0xf) @@ -125,6 +125,12 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + +/** + * Make a simple fragment shader that sets the output color to a color + * taken from a texture. + * \param tex_target one of PIPE_TEXTURE_x + */ void * util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) { @@ -133,6 +139,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target ) TGSI_WRITEMASK_XYZW ); } + /** * Make a simple fragment texture shader which reads an X component from * a texture and writes it as depth. @@ -177,6 +184,7 @@ util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe, return ureg_create_shader_and_destroy( ureg, pipe ); } + /** * Make simple fragment color pass-through shader. */ @@ -186,15 +194,19 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) return util_make_fragment_clonecolor_shader(pipe, 1); } + +/** + * Make a fragment shader that copies the input color to N output colors. + */ void * util_make_fragment_clonecolor_shader(struct pipe_context *pipe, int num_cbufs) { struct ureg_program *ureg; struct ureg_src src; - struct ureg_dst dst[8]; + struct ureg_dst dst[PIPE_MAX_COLOR_BUFS]; int i; - assert(num_cbufs <= 8); + assert(num_cbufs <= PIPE_MAX_COLOR_BUFS); ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 5b8dd1abb9..1ba82bb21f 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -1155,27 +1155,6 @@ ycbcr_get_tile_rgba(const ushort *src, } -static void -fake_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (i ^ j) & 1 ? 1.0f : 0.0f; - } - p += dst_stride; - } -} - - void pipe_tile_raw_to_rgba(enum pipe_format format, void *src, @@ -1258,8 +1237,10 @@ pipe_tile_raw_to_rgba(enum pipe_format format, ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format)); - fake_get_tile_rgba(src, w, h, dst, dst_stride); + util_format_read_4f(format, + dst, dst_stride * sizeof(float), + src, util_format_get_stride(format, w), + 0, 0, w, h); } } diff --git a/src/gallium/docs/Makefile b/src/gallium/docs/Makefile new file mode 100644 index 0000000000..d4a5be4192 --- /dev/null +++ b/src/gallium/docs/Makefile @@ -0,0 +1,89 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Gallium.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Gallium.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/src/gallium/docs/make.bat b/src/gallium/docs/make.bat new file mode 100644 index 0000000000..6f97e0730a --- /dev/null +++ b/src/gallium/docs/make.bat @@ -0,0 +1,113 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +set SPHINXBUILD=sphinx-build +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^<target^>` where ^<target^> is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Gallium.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Gallium.ghc + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/src/gallium/docs/source/conf.py b/src/gallium/docs/source/conf.py new file mode 100644 index 0000000000..9b0c86babd --- /dev/null +++ b/src/gallium/docs/source/conf.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +# +# Gallium documentation build configuration file, created by +# sphinx-quickstart on Sun Dec 20 14:09:05 2009. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.append(os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.pngmath'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Gallium' +copyright = u'2009, VMWare, X.org, Nouveau' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.3' +# The full version, including alpha/beta/rc tags. +release = '0.3' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# The language for highlighting source code. +highlight_language = 'c' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Galliumdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'Gallium.tex', u'Gallium Documentation', + u'VMWare, X.org, Nouveau', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst new file mode 100644 index 0000000000..21f5f9111a --- /dev/null +++ b/src/gallium/docs/source/context.rst @@ -0,0 +1,128 @@ +Context +======= + +The context object represents the purest, most directly accessible, abilities +of the device's 3D rendering pipeline. + +Methods +------- + +CSO State +^^^^^^^^^ + +All CSO state is created, bound, and destroyed, with triplets of methods that +all follow a specific naming scheme. For example, ``create_blend_state``, +``bind_blend_state``, and ``destroy_blend_state``. + +CSO objects handled by the context object: + +* :ref:`Blend`: ``*_blend_state`` +* :ref:`Sampler`: These are special; they can be bound to either vertex or + fragment samplers, and they are bound in groups. + ``bind_fragment_sampler_states``, ``bind_vertex_sampler_states`` +* :ref:`Rasterizer`: ``*_rasterizer_state`` +* :ref:`Depth, Stencil, & Alpha`: ``*_depth_stencil_alpha_state`` +* :ref:`Shader`: These have two sets of methods. ``*_fs_state`` is for + fragment shaders, and ``*_vs_state`` is for vertex shaders. + + +Resource Binding State +^^^^^^^^^^^^^^^^^^^^^^ + +This state describes how resources in various flavours (textures, +buffers, surfaces) are bound to the driver. + + +* ``set_constant_buffer`` +* ``set_framebuffer_state`` +* ``set_fragment_sampler_textures`` +* ``set_vertex_sampler_textures`` +* ``set_vertex_buffers`` + + +Non-CSO State +^^^^^^^^^^^^^ + +These pieces of state are too small, variable, and/or trivial to have CSO +objects. They all follow simple, one-method binding calls, e.g. +``set_edgeflags``. + +* ``set_edgeflags`` +* ``set_blend_color`` +* ``set_clip_state`` +* ``set_polygon_stipple`` +* ``set_scissor_state`` +* ``set_viewport_state`` +* ``set_vertex_elements`` + + +Clearing +^^^^^^^^ + +``clear`` initializes some or all of the surfaces currently bound to +the framebuffer to particular RGBA, depth, or stencil values. + +Clear is one of the most difficult concepts to nail down to a single +interface and it seems likely that we will want to add additional +clear paths, for instance clearing surfaces not bound to the +framebuffer, or read-modify-write clears such as depth-only or +stencil-only clears of packed depth-stencil buffers. + + +Drawing +^^^^^^^ + +``draw_arrays`` + +``draw_elements`` + +``draw_range_elements`` + + +Queries +^^^^^^^ + +Queries gather some statistic from the 3D pipeline over one or more +draws. Queries may be nested, though no state tracker currently +exercises this. + +Queries can be created with ``create_query`` and deleted with +``destroy_query``. To enable a query, use ``begin_query``, and when finished, +use ``end_query`` to stop the query. Finally, ``get_query_result`` is used +to retrieve the results. + +Flushing +^^^^^^^^ + +``flush`` + + +Resource Busy Queries +^^^^^^^^^^^^^^^^^^^^^ + +``is_texture_referenced`` + +``is_buffer_referenced`` + + + +Blitting +^^^^^^^^ + +These methods emulate classic blitter controls. They are not guaranteed to be +available; if they are set to NULL, then they are not present. + +These methods operate directly on ``pipe_surface`` objects, and stand +apart from any 3D state in the context. Blitting functionality may be +moved to a separate abstraction at some point in the future. + +``surface_fill`` performs a fill operation on a section of a surface. + +``surface_copy`` blits a region of a surface to a region of another surface, +provided that both surfaces are the same format. The source and destination +may be the same surface, and overlapping blits are permitted. + +The interfaces to these calls are likely to change to make it easier +for a driver to batch multiple blits with the same source and +destination. + diff --git a/src/gallium/docs/source/cso.rst b/src/gallium/docs/source/cso.rst new file mode 100644 index 0000000000..dab1ee50f3 --- /dev/null +++ b/src/gallium/docs/source/cso.rst @@ -0,0 +1,14 @@ +CSO +=== + +CSO, Constant State Objects, are a core part of Gallium's API. + +CSO work on the principle of reusable state; they are created by filling +out a state object with the desired properties, then passing that object +to a context. The context returns an opaque context-specific handle which +can be bound at any time for the desired effect. + +.. toctree:: + :glob: + + cso/* diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst new file mode 100644 index 0000000000..fd9e4a1e2d --- /dev/null +++ b/src/gallium/docs/source/cso/blend.rst @@ -0,0 +1,14 @@ +.. _blend: + +Blend +===== + +This state controls blending of the final fragments into the target rendering +buffers. + +XXX it is unresolved what behavior should result if blend_enable is off. + +Members +------- + +XXX undocumented members diff --git a/src/gallium/docs/source/cso/dsa.rst b/src/gallium/docs/source/cso/dsa.rst new file mode 100644 index 0000000000..12abaa9d6f --- /dev/null +++ b/src/gallium/docs/source/cso/dsa.rst @@ -0,0 +1,58 @@ +.. _depth,stencil,&alpha: + +Depth, Stencil, & Alpha +======================= + +These three states control the depth, stencil, and alpha tests, used to +discard fragments that have passed through the fragment shader. + +Traditionally, these three tests have been clumped together in hardware, so +they are all stored in one structure. + +During actual execution, the order of operations done on fragments is always: + +* Stencil +* Depth +* Alpha + +Depth Members +------------- + +enabled + Whether the depth test is enabled. +writemask + Whether the depth buffer receives depth writes. +func + The depth test function. One of PIPE_FUNC. + +Stencil Members +--------------- + +XXX document valuemask, writemask + +enabled + Whether the stencil test is enabled. For the second stencil, whether the + two-sided stencil is enabled. +func + The stencil test function. One of PIPE_FUNC. +ref_value + Stencil test reference value; used for certain functions. +fail_op + The operation to carry out if the stencil test fails. One of + PIPE_STENCIL_OP. +zfail_op + The operation to carry out if the stencil test passes but the depth test + fails. One of PIPE_STENCIL_OP. +zpass_op + The operation to carry out if the stencil test and depth test both pass. + One of PIPE_STENCIL_OP. + +Alpha Members +------------- + +enabled + Whether the alpha test is enabled. +func + The alpha test function. One of PIPE_FUNC. +ref_value + Alpha test reference value; used for certain functions. diff --git a/src/gallium/docs/source/cso/rasterizer.rst b/src/gallium/docs/source/cso/rasterizer.rst new file mode 100644 index 0000000000..4d8e1708e7 --- /dev/null +++ b/src/gallium/docs/source/cso/rasterizer.rst @@ -0,0 +1,152 @@ +.. _rasterizer: + +Rasterizer +========== + +The rasterizer state controls the rendering of points, lines and triangles. +Attributes include polygon culling state, line width, line stipple, +multisample state, scissoring and flat/smooth shading. + + +Members +------- + +flatshade + If set, the provoking vertex of each polygon is used to determine the + color of the entire polygon. If not set, fragment colors will be + interpolated between the vertex colors. + Note that this is separate from the fragment shader input attributes + CONSTANT, LINEAR and PERSPECTIVE. We need the flatshade state at + clipping time to determine how to set the color of new vertices. + Also note that the draw module can implement flat shading by copying + the provoking vertex color to all the other vertices in the primitive. + +flatshade_first + Whether the first vertex should be the provoking vertex, for most + primitives. If not set, the last vertex is the provoking vertex. + +light_twoside + If set, there are per-vertex back-facing colors. The draw module + uses this state along with the front/back information to set the + final vertex colors prior to rasterization. + +front_winding + Indicates the window order of front-facing polygons, either + PIPE_WINDING_CW or PIPE_WINDING_CCW +cull_mode + Indicates which polygons to cull, either PIPE_WINDING_NONE (cull no + polygons), PIPE_WINDING_CW (cull clockwise-winding polygons), + PIPE_WINDING_CCW (cull counter clockwise-winding polygons), or + PIPE_WINDING_BOTH (cull all polygons). + +fill_cw + Indicates how to fill clockwise polygons, either PIPE_POLYGON_MODE_FILL, + PIPE_POLYGON_MODE_LINE or PIPE_POLYGON_MODE_POINT. +fill_ccw + Indicates how to fill counter clockwise polygons, either + PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE or PIPE_POLYGON_MODE_POINT. + +poly_stipple_enable + Whether polygon stippling is enabled. +poly_smooth + Controls OpenGL-style polygon smoothing/antialiasing +offset_cw + If set, clockwise polygons will have polygon offset factors applied +offset_ccw + If set, counter clockwise polygons will have polygon offset factors applied +offset_units + Specifies the polygon offset bias +offset_scale + Specifies the polygon offset scale + +line_width + The width of lines. +line_smooth + Whether lines should be smoothed. Line smoothing is simply anti-aliasing. +line_stipple_enable + Whether line stippling is enabled. +line_stipple_pattern + 16-bit bitfield of on/off flags, used to pattern the line stipple. +line_stipple_factor + When drawinga stippled line, each bit in the stipple pattern is + repeated N times, where N = line_stipple_factor + 1. +line_last_pixel + Controls whether the last pixel in a line is drawn or not. OpenGL + omits the last pixel to avoid double-drawing pixels at the ends of lines + when drawing connected lines. + +point_smooth + Whether points should be smoothed. Point smoothing turns rectangular + points into circles or ovals. +point_size_per_vertex + Whether vertices have a point size element. +point_size + The size of points, if not specified per-vertex. +point_size_min + The minimum size of points. +point_size_max + The maximum size of points. +point_sprite + Whether points are drawn as sprites (textured quads) +sprite_coord_mode + Specifies how the value for each shader output should be computed when + drawing sprites. If PIPE_SPRITE_COORD_NONE, don't change the vertex + shader output. Otherwise, the four vertices of the resulting quad will + be assigned texture coordinates. For PIPE_SPRITE_COORD_LOWER_LEFT, the + lower left vertex will have coordinate (0,0,0,1). + For PIPE_SPRITE_COORD_UPPER_LEFT, the upper-left vertex will have + coordinate (0,0,0,1). + This state is needed by the 'draw' module because that's where each + point vertex is converted into four quad vertices. There's no other + place to emit the new vertex texture coordinates which are required for + sprite rendering. + Note that when geometry shaders are available, this state could be + removed. A special geometry shader defined by the state tracker could + converts the incoming points into quads with the proper texture coords. + +scissor + Whether the scissor test is enabled. + +multisample + Whether :ref:`MSAA` is enabled. + +bypass_vs_clip_and_viewport + Whether the entire TCL pipeline should be bypassed. This implies that + vertices are pre-transformed for the viewport, and will not be run + through the vertex shader. Note that implementations may still clip away + vertices that are not in the viewport. + +gl_rasterization_rules + Whether the rasterizer should use (0.5, 0.5) pixel centers. When not set, + the rasterizer will use (0, 0) for pixel centers. + + +Notes +----- + +flatshade +^^^^^^^^^ + +The actual interpolated shading algorithm is obviously +implementation-dependent, but will usually be Gourard for most hardware. + +bypass_vs_clip_and_viewport +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When set, this implies that vertices are pre-transformed for the viewport, and +will not be run through the vertex shader. Note that implementations may still +clip away vertices that are not visible. + +flatshade_first +^^^^^^^^^^^^^^^ + +There are several important exceptions to the specification of this rule. + +* ``PIPE_PRIMITIVE_POLYGON``: The provoking vertex is always the first + vertex. If the caller wishes to change the provoking vertex, they merely + need to rotate the vertices themselves. +* ``PIPE_PRIMITIVE_QUAD``, ``PIPE_PRIMITIVE_QUAD_STRIP``: This option has no + effect; the provoking vertex is always the last vertex. +* ``PIPE_PRIMITIVE_TRIANGLE_FAN``: When set, the provoking vertex is the + second vertex, not the first. This permits each segment of the fan to have + a different color. diff --git a/src/gallium/docs/source/cso/sampler.rst b/src/gallium/docs/source/cso/sampler.rst new file mode 100644 index 0000000000..e3f1757f57 --- /dev/null +++ b/src/gallium/docs/source/cso/sampler.rst @@ -0,0 +1,46 @@ +.. _sampler: + +Sampler +======= + +Texture units have many options for selecting texels from loaded textures; +this state controls an individual texture unit's texel-sampling settings. + +Texture coordinates are always treated as four-dimensional, and referred to +with the traditional (S, T, R, Q) notation. + +Members +------- + +XXX undocumented compare_mode, compare_func + +wrap_s + How to wrap the S coordinate. One of PIPE_TEX_WRAP. +wrap_t + How to wrap the T coordinate. One of PIPE_TEX_WRAP. +wrap_r + How to wrap the R coordinate. One of PIPE_TEX_WRAP. +min_img_filter + The filter to use when minifying texels. One of PIPE_TEX_FILTER. +min_mip_filter + The filter to use when minifying mipmapped textures. One of + PIPE_TEX_FILTER. +mag_img_filter + The filter to use when magnifying texels. One of PIPE_TEX_FILTER. +normalized_coords + Whether the texture coordinates are normalized. If normalized, they will + always be in [0, 1]. If not, they will be in the range of each dimension + of the loaded texture. +prefilter + XXX From the Doxy, "weird sampling state exposed by some APIs." Refine. +lod_bias + The bias to apply to the level of detail. +min_lod + Minimum level of detail, used to clamp LoD after bias. +max_lod + Maximum level of detail, used to clamp LoD after bias. +border_color + RGBA color used for out-of-bounds coordinates. +max_anisotropy + Maximum filtering to apply anisotropically to textures. Setting this to + 1.0 effectively disables anisotropic filtering. diff --git a/src/gallium/docs/source/cso/shader.rst b/src/gallium/docs/source/cso/shader.rst new file mode 100644 index 0000000000..0ee42c8787 --- /dev/null +++ b/src/gallium/docs/source/cso/shader.rst @@ -0,0 +1,12 @@ +.. _shader: + +Shader +====== + +One of the two types of shaders supported by Gallium. + +Members +------- + +tokens + A list of tgsi_tokens. diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst new file mode 100644 index 0000000000..33e846e33d --- /dev/null +++ b/src/gallium/docs/source/distro.rst @@ -0,0 +1,141 @@ +Distribution +============ + +Along with the interface definitions, the following drivers, state trackers, +and auxiliary modules are shipped in the standard Gallium distribution. + +Drivers +------- + +Cell +^^^^ + +Failover +^^^^^^^^ + +Deprecated. + +Intel i915 +^^^^^^^^^^ + +Intel i965 +^^^^^^^^^^ + +Highly experimental. + +Identity +^^^^^^^^ + +Wrapper driver. + +LLVM Softpipe +^^^^^^^^^^^^^ + +nVidia nv04 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv10 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv20 +^^^^^^^^^^^ + +Deprecated. + +nVidia nv30 +^^^^^^^^^^^ + +nVidia nv40 +^^^^^^^^^^^ + +nVidia nv50 +^^^^^^^^^^^ + +VMWare SVGA +^^^^^^^^^^^ + +ATI r300 +^^^^^^^^ + +AMD/ATI r600 +^^^^^^^^^^^^ + +Highly experimental. + +Softpipe +^^^^^^^^ + +Reference software rasterizer. + +Trace +^^^^^ + +Wrapper driver. + +State Trackers +-------------- + +Direct Rendering Infrastructure +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +EGL +^^^ + +GLX +^^^ + +MesaGL +^^^^^^ + +Python +^^^^^^ + +OpenVG +^^^^^^ + +WGL +^^^ + +Xorg XFree86 DDX +^^^^^^^^^^^^^^^^ + +Auxiliary +--------- + +CSO Cache +^^^^^^^^^ + +Draw +^^^^ + +Gallivm +^^^^^^^ + +Indices +^^^^^^^ + +Pipe Buffer Manager +^^^^^^^^^^^^^^^^^^^ + +Remote Debugger +^^^^^^^^^^^^^^^ + +Runtime Assembly Emission +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Surface Context Tracker +^^^^^^^^^^^^^^^^^^^^^^^ + +TGSI +^^^^ + +Translate +^^^^^^^^^ + +Util +^^^^ + diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst new file mode 100644 index 0000000000..6a9110ce78 --- /dev/null +++ b/src/gallium/docs/source/glossary.rst @@ -0,0 +1,10 @@ +Glossary +======== + +.. glossary:: + :sorted: + + MSAA + Multi-Sampled Anti-Aliasing. A basic anti-aliasing technique that takes + multiple samples of the depth buffer, and uses this information to + smooth the edges of polygons. diff --git a/src/gallium/docs/source/index.rst b/src/gallium/docs/source/index.rst new file mode 100644 index 0000000000..54bc883fce --- /dev/null +++ b/src/gallium/docs/source/index.rst @@ -0,0 +1,28 @@ +.. Gallium documentation master file, created by + sphinx-quickstart on Sun Dec 20 14:09:05 2009. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Gallium's documentation! +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + tgsi + screen + context + cso + distro + glossary + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/src/gallium/docs/source/intro.rst b/src/gallium/docs/source/intro.rst new file mode 100644 index 0000000000..1ea103840a --- /dev/null +++ b/src/gallium/docs/source/intro.rst @@ -0,0 +1,9 @@ +Introduction +============ + +What is Gallium? +---------------- + +Gallium is essentially an API for writing graphics drivers in a largely +device-agnostic fashion. It provides several objects which encapsulate the +core services of graphics hardware in a straightforward manner. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst new file mode 100644 index 0000000000..9631e6967e --- /dev/null +++ b/src/gallium/docs/source/screen.rst @@ -0,0 +1,39 @@ +Screen +====== + +A screen is an object representing the context-independent part of a device. + +Methods +------- + +XXX moar; got bored + +get_name +^^^^^^^^ + +Returns an identifying name for the screen. + +get_vendor +^^^^^^^^^^ + +Returns the screen vendor. + +get_param +^^^^^^^^^ + +Get an integer/boolean screen parameter. + +get_paramf +^^^^^^^^^^ + +Get a floating-point screen parameter. + +is_format_supported +^^^^^^^^^^^^^^^^^^^ + +See if a format can be used in a specific manner. + +texture_create +^^^^^^^^^^^^^^ + +Given a template of texture setup, create a BO-backed texture. diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst new file mode 100644 index 0000000000..ebee4902b0 --- /dev/null +++ b/src/gallium/docs/source/tgsi.rst @@ -0,0 +1,1270 @@ +TGSI +==== + +TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language +for describing shaders. Since Gallium is inherently shaderful, shaders are +an important part of the API. TGSI is the only intermediate representation +used by all drivers. + +Instruction Set +--------------- + +From GL_NV_vertex_program +^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARL - Address Register Load + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +MOV - Move + +.. math:: + + dst.x = src.x + + dst.y = src.y + + dst.z = src.z + + dst.w = src.w + + +LIT - Light Coefficients + +.. math:: + + dst.x = 1 + + dst.y = max(src.x, 0) + + dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 + + dst.w = 1 + + +RCP - Reciprocal + +.. math:: + + dst.x = \frac{1}{src.x} + + dst.y = \frac{1}{src.x} + + dst.z = \frac{1}{src.x} + + dst.w = \frac{1}{src.x} + + +RSQ - Reciprocal Square Root + +.. math:: + + dst.x = \frac{1}{\sqrt{|src.x|}} + + dst.y = \frac{1}{\sqrt{|src.x|}} + + dst.z = \frac{1}{\sqrt{|src.x|}} + + dst.w = \frac{1}{\sqrt{|src.x|}} + + +EXP - Approximate Exponential Base 2 + +.. math:: + + dst.x = 2^{\lfloor src.x\rfloor} + + dst.y = src.x - \lfloor src.x\rfloor + + dst.z = 2^{src.x} + + dst.w = 1 + + +LOG - Approximate Logarithm Base 2 + +.. math:: + + dst.x = \lfloor\log_2{|src.x|}\rfloor + + dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} + + dst.z = \log_2{|src.x|} + + dst.w = 1 + + +MUL - Multiply + +.. math:: + + dst.x = src0.x \times src1.x + + dst.y = src0.y \times src1.y + + dst.z = src0.z \times src1.z + + dst.w = src0.w \times src1.w + + +ADD - Add + +.. math:: + + dst.x = src0.x + src1.x + + dst.y = src0.y + src1.y + + dst.z = src0.z + src1.z + + dst.w = src0.w + src1.w + + +DP3 - 3-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + + +DP4 - 4-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w + + +DST - Distance Vector + +.. math:: + + dst.x = 1 + + dst.y = src0.y \times src1.y + + dst.z = src0.z + + dst.w = src1.w + + +MIN - Minimum + +.. math:: + + dst.x = min(src0.x, src1.x) + + dst.y = min(src0.y, src1.y) + + dst.z = min(src0.z, src1.z) + + dst.w = min(src0.w, src1.w) + + +MAX - Maximum + +.. math:: + + dst.x = max(src0.x, src1.x) + + dst.y = max(src0.y, src1.y) + + dst.z = max(src0.z, src1.z) + + dst.w = max(src0.w, src1.w) + + +SLT - Set On Less Than + +.. math:: + + dst.x = (src0.x < src1.x) ? 1 : 0 + + dst.y = (src0.y < src1.y) ? 1 : 0 + + dst.z = (src0.z < src1.z) ? 1 : 0 + + dst.w = (src0.w < src1.w) ? 1 : 0 + + +SGE - Set On Greater Equal Than + +.. math:: + + dst.x = (src0.x >= src1.x) ? 1 : 0 + + dst.y = (src0.y >= src1.y) ? 1 : 0 + + dst.z = (src0.z >= src1.z) ? 1 : 0 + + dst.w = (src0.w >= src1.w) ? 1 : 0 + + +MAD - Multiply And Add + +.. math:: + + dst.x = src0.x \times src1.x + src2.x + + dst.y = src0.y \times src1.y + src2.y + + dst.z = src0.z \times src1.z + src2.z + + dst.w = src0.w \times src1.w + src2.w + + +SUB - Subtract + +.. math:: + + dst.x = src0.x - src1.x + + dst.y = src0.y - src1.y + + dst.z = src0.z - src1.z + + dst.w = src0.w - src1.w + + +LRP - Linear Interpolate + +.. math:: + + dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x + + dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y + + dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z + + dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w + + +CND - Condition + +.. math:: + + dst.x = (src2.x > 0.5) ? src0.x : src1.x + + dst.y = (src2.y > 0.5) ? src0.y : src1.y + + dst.z = (src2.z > 0.5) ? src0.z : src1.z + + dst.w = (src2.w > 0.5) ? src0.w : src1.w + + +DP2A - 2-component Dot Product And Add + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x + + +FRAC - Fraction + +.. math:: + + dst.x = src.x - \lfloor src.x\rfloor + + dst.y = src.y - \lfloor src.y\rfloor + + dst.z = src.z - \lfloor src.z\rfloor + + dst.w = src.w - \lfloor src.w\rfloor + + +CLAMP - Clamp + +.. math:: + + dst.x = clamp(src0.x, src1.x, src2.x) + + dst.y = clamp(src0.y, src1.y, src2.y) + + dst.z = clamp(src0.z, src1.z, src2.z) + + dst.w = clamp(src0.w, src1.w, src2.w) + + +FLR - Floor + +This is identical to ARL. + +.. math:: + + dst.x = \lfloor src.x\rfloor + + dst.y = \lfloor src.y\rfloor + + dst.z = \lfloor src.z\rfloor + + dst.w = \lfloor src.w\rfloor + + +ROUND - Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +EX2 - Exponential Base 2 + +.. math:: + + dst.x = 2^{src.x} + + dst.y = 2^{src.x} + + dst.z = 2^{src.x} + + dst.w = 2^{src.x} + + +LG2 - Logarithm Base 2 + +.. math:: + + dst.x = \log_2{src.x} + + dst.y = \log_2{src.x} + + dst.z = \log_2{src.x} + + dst.w = \log_2{src.x} + + +POW - Power + +.. math:: + + dst.x = src0.x^{src1.x} + + dst.y = src0.x^{src1.x} + + dst.z = src0.x^{src1.x} + + dst.w = src0.x^{src1.x} + +XPD - Cross Product + +.. math:: + + dst.x = src0.y \times src1.z - src1.y \times src0.z + + dst.y = src0.z \times src1.x - src1.z \times src0.x + + dst.z = src0.x \times src1.y - src1.x \times src0.y + + dst.w = 1 + + +ABS - Absolute + +.. math:: + + dst.x = |src.x| + + dst.y = |src.y| + + dst.z = |src.z| + + dst.w = |src.w| + + +RCC - Reciprocal Clamped + +XXX cleanup on aisle three + +.. math:: + + dst.x = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.y = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.z = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + dst.w = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020) + + +DPH - Homogeneous Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.y = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.z = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + dst.w = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w + + +COS - Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \cos{src.x} + + dst.z = \cos{src.x} + + dst.w = \cos{src.x} + + +DDX - Derivative Relative To X + +.. math:: + + dst.x = partialx(src.x) + + dst.y = partialx(src.y) + + dst.z = partialx(src.z) + + dst.w = partialx(src.w) + + +DDY - Derivative Relative To Y + +.. math:: + + dst.x = partialy(src.x) + + dst.y = partialy(src.y) + + dst.z = partialy(src.z) + + dst.w = partialy(src.w) + + +KILP - Predicated Discard + + discard + + +PK2H - Pack Two 16-bit Floats + + TBD + + +PK2US - Pack Two Unsigned 16-bit Scalars + + TBD + + +PK4B - Pack Four Signed 8-bit Scalars + + TBD + + +PK4UB - Pack Four Unsigned 8-bit Scalars + + TBD + + +RFL - Reflection Vector + +.. math:: + + dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x + + dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y + + dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z + + dst.w = 1 + +Considered for removal. + + +SEQ - Set On Equal + +.. math:: + + dst.x = (src0.x == src1.x) ? 1 : 0 + dst.y = (src0.y == src1.y) ? 1 : 0 + dst.z = (src0.z == src1.z) ? 1 : 0 + dst.w = (src0.w == src1.w) ? 1 : 0 + + +SFL - Set On False + +.. math:: + + dst.x = 0 + dst.y = 0 + dst.z = 0 + dst.w = 0 + +Considered for removal. + +SGT - Set On Greater Than + +.. math:: + + dst.x = (src0.x > src1.x) ? 1 : 0 + dst.y = (src0.y > src1.y) ? 1 : 0 + dst.z = (src0.z > src1.z) ? 1 : 0 + dst.w = (src0.w > src1.w) ? 1 : 0 + + +SIN - Sine + +.. math:: + + dst.x = \sin{src.x} + + dst.y = \sin{src.x} + + dst.z = \sin{src.x} + + dst.w = \sin{src.x} + + +SLE - Set On Less Equal Than + +.. math:: + + dst.x = (src0.x <= src1.x) ? 1 : 0 + dst.y = (src0.y <= src1.y) ? 1 : 0 + dst.z = (src0.z <= src1.z) ? 1 : 0 + dst.w = (src0.w <= src1.w) ? 1 : 0 + + +SNE - Set On Not Equal + +.. math:: + + dst.x = (src0.x != src1.x) ? 1 : 0 + dst.y = (src0.y != src1.y) ? 1 : 0 + dst.z = (src0.z != src1.z) ? 1 : 0 + dst.w = (src0.w != src1.w) ? 1 : 0 + + +STR - Set On True + +.. math:: + + dst.x = 1 + dst.y = 1 + dst.z = 1 + dst.w = 1 + + +TEX - Texture Lookup + + TBD + + +TXD - Texture Lookup with Derivatives + + TBD + + +TXP - Projective Texture Lookup + + TBD + + +UP2H - Unpack Two 16-Bit Floats + + TBD + + Considered for removal. + +UP2US - Unpack Two Unsigned 16-Bit Scalars + + TBD + + Considered for removal. + +UP4B - Unpack Four Signed 8-Bit Values + + TBD + + Considered for removal. + +UP4UB - Unpack Four Unsigned 8-Bit Scalars + + TBD + + Considered for removal. + +X2D - 2D Coordinate Transformation + +.. math:: + + dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w + dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y + dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w + +Considered for removal. + + +From GL_NV_vertex_program2 +^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +ARA - Address Register Add + + TBD + + Considered for removal. + +ARR - Address Register Load With Round + +.. math:: + + dst.x = round(src.x) + + dst.y = round(src.y) + + dst.z = round(src.z) + + dst.w = round(src.w) + + +BRA - Branch + + pc = target + + Considered for removal. + +CAL - Subroutine Call + + push(pc) + pc = target + + +RET - Subroutine Call Return + + pc = pop() + + Potential restrictions: + * Only occurs at end of function. + +SSG - Set Sign + +.. math:: + + dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 + + dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 + + dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 + + dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 + + +CMP - Compare + +.. math:: + + dst.x = (src0.x < 0) ? src1.x : src2.x + + dst.y = (src0.y < 0) ? src1.y : src2.y + + dst.z = (src0.z < 0) ? src1.z : src2.z + + dst.w = (src0.w < 0) ? src1.w : src2.w + + +KIL - Conditional Discard + +.. math:: + + if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) + discard + endif + + +SCS - Sine Cosine + +.. math:: + + dst.x = \cos{src.x} + + dst.y = \sin{src.x} + + dst.z = 0 + + dst.y = 1 + + +TXB - Texture Lookup With Bias + + TBD + + +NRM - 3-component Vector Normalise + +.. math:: + + dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z) + + dst.w = 1 + + +DIV - Divide + +.. math:: + + dst.x = \frac{src0.x}{src1.x} + + dst.y = \frac{src0.y}{src1.y} + + dst.z = \frac{src0.z}{src1.z} + + dst.w = \frac{src0.w}{src1.w} + + +DP2 - 2-component Dot Product + +.. math:: + + dst.x = src0.x \times src1.x + src0.y \times src1.y + + dst.y = src0.x \times src1.x + src0.y \times src1.y + + dst.z = src0.x \times src1.x + src0.y \times src1.y + + dst.w = src0.x \times src1.x + src0.y \times src1.y + + +TXL - Texture Lookup With LOD + + TBD + + +BRK - Break + + TBD + + +IF - If + + TBD + + +BGNFOR - Begin a For-Loop + + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + + if (dst.y <= 0) + pc = [matching ENDFOR] + 1 + endif + + Note: The destination must be a loop register. + The source must be a constant register. + + Considered for cleanup / removal. + + +REP - Repeat + + TBD + + +ELSE - Else + + TBD + + +ENDIF - End If + + TBD + + +ENDFOR - End a For-Loop + + dst.x = dst.x + dst.z + dst.y = dst.y - 1.0 + + if (dst.y > 0) + pc = [matching BGNFOR instruction] + 1 + endif + + Note: The destination must be a loop register. + + Considered for cleanup / removal. + +ENDREP - End Repeat + + TBD + + +PUSHA - Push Address Register On Stack + + push(src.x) + push(src.y) + push(src.z) + push(src.w) + + Considered for cleanup / removal. + +POPA - Pop Address Register From Stack + + dst.w = pop() + dst.z = pop() + dst.y = pop() + dst.x = pop() + + Considered for cleanup / removal. + + +From GL_NV_gpu_program4 +^^^^^^^^^^^^^^^^^^^^^^^^ + +Support for these opcodes indicated by a special pipe capability bit (TBD). + +CEIL - Ceiling + +.. math:: + + dst.x = \lceil src.x\rceil + + dst.y = \lceil src.y\rceil + + dst.z = \lceil src.z\rceil + + dst.w = \lceil src.w\rceil + + +I2F - Integer To Float + +.. math:: + + dst.x = (float) src.x + + dst.y = (float) src.y + + dst.z = (float) src.z + + dst.w = (float) src.w + + +NOT - Bitwise Not + +.. math:: + + dst.x = ~src.x + + dst.y = ~src.y + + dst.z = ~src.z + + dst.w = ~src.w + + +TRUNC - Truncate + +.. math:: + + dst.x = trunc(src.x) + + dst.y = trunc(src.y) + + dst.z = trunc(src.z) + + dst.w = trunc(src.w) + + +SHL - Shift Left + +.. math:: + + dst.x = src0.x << src1.x + + dst.y = src0.y << src1.x + + dst.z = src0.z << src1.x + + dst.w = src0.w << src1.x + + +SHR - Shift Right + +.. math:: + + dst.x = src0.x >> src1.x + + dst.y = src0.y >> src1.x + + dst.z = src0.z >> src1.x + + dst.w = src0.w >> src1.x + + +AND - Bitwise And + +.. math:: + + dst.x = src0.x & src1.x + + dst.y = src0.y & src1.y + + dst.z = src0.z & src1.z + + dst.w = src0.w & src1.w + + +OR - Bitwise Or + +.. math:: + + dst.x = src0.x | src1.x + + dst.y = src0.y | src1.y + + dst.z = src0.z | src1.z + + dst.w = src0.w | src1.w + + +MOD - Modulus + +.. math:: + + dst.x = src0.x \bmod src1.x + + dst.y = src0.y \bmod src1.y + + dst.z = src0.z \bmod src1.z + + dst.w = src0.w \bmod src1.w + + +XOR - Bitwise Xor + +.. math:: + + dst.x = src0.x ^ src1.x + + dst.y = src0.y ^ src1.y + + dst.z = src0.z ^ src1.z + + dst.w = src0.w ^ src1.w + + +SAD - Sum Of Absolute Differences + +.. math:: + + dst.x = |src0.x - src1.x| + src2.x + + dst.y = |src0.y - src1.y| + src2.y + + dst.z = |src0.z - src1.z| + src2.z + + dst.w = |src0.w - src1.w| + src2.w + + +TXF - Texel Fetch + + TBD + + +TXQ - Texture Size Query + + TBD + + +CONT - Continue + + TBD + + +From GL_NV_geometry_program4 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +EMIT - Emit + + TBD + + +ENDPRIM - End Primitive + + TBD + + +From GLSL +^^^^^^^^^^ + + +BGNLOOP - Begin a Loop + + TBD + + +BGNSUB - Begin Subroutine + + TBD + + +ENDLOOP - End a Loop + + TBD + + +ENDSUB - End Subroutine + + TBD + + +NOP - No Operation + + Do nothing. + + +NRM4 - 4-component Vector Normalise + +.. math:: + + dst.x = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.y = \frac{src.y}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.z = \frac{src.z}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + dst.w = \frac{src.w}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w} + + +ps_2_x +^^^^^^^^^^^^ + + +CALLNZ - Subroutine Call If Not Zero + + TBD + + +IFC - If + + TBD + + +BREAKC - Break Conditional + + TBD + + +Explanation of symbols used +------------------------------ + + +Functions +^^^^^^^^^^^^^^ + + + :math:`|x|` Absolute value of `x`. + + :math:`\lceil x \rceil` Ceiling of `x`. + + clamp(x,y,z) Clamp x between y and z. + (x < y) ? y : (x > z) ? z : x + + :math:`\lfloor x\rfloor` Floor of `x`. + + :math:`\log_2{x}` Logarithm of `x`, base 2. + + max(x,y) Maximum of x and y. + (x > y) ? x : y + + min(x,y) Minimum of x and y. + (x < y) ? x : y + + partialx(x) Derivative of x relative to fragment's X. + + partialy(x) Derivative of x relative to fragment's Y. + + pop() Pop from stack. + + :math:`x^y` `x` to the power `y`. + + push(x) Push x on stack. + + round(x) Round x. + + trunc(x) Truncate x, i.e. drop the fraction bits. + + +Keywords +^^^^^^^^^^^^^ + + + discard Discard fragment. + + dst First destination register. + + dst0 First destination register. + + pc Program counter. + + src First source register. + + src0 First source register. + + src1 Second source register. + + src2 Third source register. + + target Label of target instruction. + + +Other tokens +--------------- + + +Declaration Semantic +^^^^^^^^^^^^^^^^^^^^^^^^ + + + Follows Declaration token if Semantic bit is set. + + Since its purpose is to link a shader with other stages of the pipeline, + it is valid to follow only those Declaration tokens that declare a register + either in INPUT or OUTPUT file. + + SemanticName field contains the semantic name of the register being declared. + There is no default value. + + SemanticIndex is an optional subscript that can be used to distinguish + different register declarations with the same semantic name. The default value + is 0. + + The meanings of the individual semantic names are explained in the following + sections. + +TGSI_SEMANTIC_POSITION +"""""""""""""""""""""" + +Position, sometimes known as HPOS or WPOS for historical reasons, is the +location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` +are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used +for the perspective divide, if enabled. + +As a vertex shader output, position should be scaled to the viewport. When +used in fragment shaders, position will --- + +XXX --- wait a minute. Should position be in [0,1] for x and y? + +XXX additionally, is there a way to configure the perspective divide? it's +accelerated on most chipsets AFAIK... + +Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can +be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. + +XXX usually? can we solidify that? + +TGSI_SEMANTIC_COLOR +""""""""""""""""""" + +Colors are used to, well, color the primitives. Colors are always in +``(r, g, b, a)`` format. + +If alpha is not specified, it defaults to 1. + +TGSI_SEMANTIC_BCOLOR +"""""""""""""""""""" + +Back-facing colors are only used for back-facing polygons, and are only valid +in vertex shader outputs. After rasterization, all polygons are front-facing +and COLOR and BCOLOR end up occupying the same slots in the fragment, so +all BCOLORs effectively become regular COLORs in the fragment shader. + +TGSI_SEMANTIC_FOG +""""""""""""""""" + +The fog coordinate historically has been used to replace the depth coordinate +for generation of fog in dedicated fog blocks. Gallium, however, does not use +dedicated fog acceleration, placing it entirely in the fragment shader +instead. + +The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first +component matters when writing from the vertex shader; the driver will ensure +that the coordinate is in this format when used as a fragment shader input. + +TGSI_SEMANTIC_PSIZE +""""""""""""""""""" + +PSIZE, or point size, is used to specify point sizes per-vertex. It should +be in ``(p, n, x, f)`` format, where ``p`` is the point size, ``n`` is the minimum +size, ``x`` is the maximum size, and ``f`` is the fade threshold. + +XXX this is arb_vp. is this what we actually do? should double-check... + +When using this semantic, be sure to set the appropriate state in the +:ref:`rasterizer` first. + +TGSI_SEMANTIC_GENERIC +""""""""""""""""""""" + +Generic semantics are nearly always used for texture coordinate attributes, +in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds +of lookups, and ``q`` is the level-of-detail bias for biased sampling. + +These attributes are called "generic" because they may be used for anything +else, including parameters, texture generation information, or anything that +can be stored inside a four-component vector. + +TGSI_SEMANTIC_NORMAL +"""""""""""""""""""" + +Vertex normal; could be used to implement per-pixel lighting for legacy APIs +that allow mixing fixed-function and programmable stages. + +TGSI_SEMANTIC_FACE +"""""""""""""""""" + +FACE is the facing bit, to store the facing information for the fragment +shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive +when the fragment is front-facing, and negative when the component is +back-facing. + +TGSI_SEMANTIC_EDGEFLAG +"""""""""""""""""""""" + +XXX no clue diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 01bea0f8cc..3fa8b975d3 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) * * XXX should the element buffer be specified/bound with a separate function? */ -static boolean +static void cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ cell_unmap_constant_buffers(sp); - - return TRUE; } -static boolean +static void cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return cell_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } -static boolean +static void cell_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return cell_draw_elements(pipe, NULL, 0, mode, start, count); + cell_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 5c0179d954..12b855a3db 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -405,8 +405,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; break; @@ -418,8 +416,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler) case PIPE_TEX_FILTER_LINEAR: spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; break; - case PIPE_TEX_FILTER_ANISO: - /* fall-through, for now */ case PIPE_TEX_FILTER_NEAREST: spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; break; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 5ed330aa6e..d86d8e09a5 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 37184eac7b..46e4338d98 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe ) } +void failover_fail_over( struct failover_context *failover ) +{ + failover->dirty = TRUE; + failover->mode = FO_SW; +} + -static boolean failover_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +static void failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, + unsigned start, + unsigned count) { struct failover_context *failover = failover_context( pipe ); @@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe, /* Try hardware: */ if (failover->mode == FO_HW) { - if (!failover->hw->draw_elements( failover->hw, - indexBuffer, - indexSize, - prim, - start, - count )) { - - failover->hw->flush( failover->hw, ~0, NULL ); - failover->mode = FO_SW; - } + failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count ); } /* Possibly try software: */ if (failover->mode == FO_SW) { - if (failover->dirty) + if (failover->dirty) { + failover->hw->flush( failover->hw, ~0, NULL ); failover_state_emit( failover ); + } failover->sw->draw_elements( failover->sw, indexBuffer, @@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe, */ failover->sw->flush( failover->sw, ~0, NULL ); } - - return TRUE; } -static boolean failover_draw_arrays( struct pipe_context *pipe, +static void failover_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return failover_draw_elements(pipe, NULL, 0, prim, start, count); + failover_draw_elements(pipe, NULL, 0, prim, start, count); } static unsigned int diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h index a8ce997a1f..533122b69d 100644 --- a/src/gallium/drivers/failover/fo_winsys.h +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -36,10 +36,13 @@ struct pipe_context; +struct failover_context; struct pipe_context *failover_create( struct pipe_context *hw, struct pipe_context *sw ); +void failover_fail_over( struct failover_context *failover ); + #endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 949f046350..89feeade75 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -45,7 +45,7 @@ */ -static boolean +static void i915_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } - - return TRUE; } -static boolean +static void i915_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) { - return i915_draw_range_elements(pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - prim, start, count); + i915_draw_range_elements(pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count); } -static boolean +static void i915_draw_arrays(struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return i915_draw_elements(pipe, NULL, 0, prim, start, count); + i915_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 1528afc859..5f5b6f8e18 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter ) return FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return FILTER_ANISOTROPIC; default: assert(0); return FILTER_NEAREST; @@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe, minFilt = translate_img_filter( sampler->min_img_filter ); magFilt = translate_img_filter( sampler->mag_img_filter ); + if (sampler->max_anisotropy > 1.0) + minFilt = magFilt = FILTER_ANISOTROPIC; + if (sampler->max_anisotropy > 2.0) { cso->state[0] |= SS2_MAX_ANISO_4; } diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 852fd22982..ea8d39adaf 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw, } -static boolean +static void brw_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe, ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } - - return TRUE; } -static boolean +static void brw_draw_elements(struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned mode, unsigned start, unsigned count) { - return brw_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - mode, - start, count ); + brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); } -static boolean +static void brw_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return brw_draw_elements(pipe, NULL, 0, mode, start, count); + brw_draw_elements(pipe, NULL, 0, mode, start, count); } diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 4fe7b6acc1..00d8eaccbc 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p, jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 5ddc63f57e..81712798a5 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter ) return BRW_MAPFILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: - return BRW_MAPFILTER_ANISOTROPIC; default: assert(0); return BRW_MAPFILTER_NEAREST; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7e57d0306b..8f983a60ae 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); + assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index bdbaae5987..9f5b4e6323 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -45,7 +45,7 @@ identity_destroy(struct pipe_context *_pipe) free(id_pipe); } -static boolean +static void identity_draw_arrays(struct pipe_context *_pipe, unsigned prim, unsigned start, @@ -54,13 +54,13 @@ identity_draw_arrays(struct pipe_context *_pipe, struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; - return pipe->draw_arrays(pipe, - prim, - start, - count); + pipe->draw_arrays(pipe, + prim, + start, + count); } -static boolean +static void identity_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -73,15 +73,15 @@ identity_draw_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_elements(pipe, - indexBuffer, - indexSize, - prim, - start, - count); + pipe->draw_elements(pipe, + indexBuffer, + indexSize, + prim, + start, + count); } -static boolean +static void identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -96,14 +96,14 @@ identity_draw_range_elements(struct pipe_context *_pipe, struct pipe_context *pipe = id_pipe->pipe; struct pipe_buffer *indexBuffer = id_buffer->buffer; - return pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, - minIndex, - maxIndex, - mode, - start, - count); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, + minIndex, + maxIndex, + mode, + start, + count); } static struct pipe_query * diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index e038a5229e..7c6e46006b 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -50,7 +50,6 @@ C_SOURCES = \ lp_state_vs.c \ lp_surface.c \ lp_tex_cache.c \ - lp_tex_sample_c.c \ lp_tex_sample_llvm.c \ lp_texture.c \ lp_tile_cache.c \ diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 0c3f00fd58..72d9f39658 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -59,27 +59,16 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.5 or greater. LLVM 2.6 is preferred. + - LLVM 2.6. - On Debian based distributions do: + For Linux, on a recent Debian based distribution do: aptitude install llvm-dev - There is a typo in one of the llvm 2.5 headers, that may cause compilation - errors. To fix it apply the change: - - --- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100 - +++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100 - @@ -831,7 +831,7 @@ - template<typename T> - inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG - - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) - + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) - cast<T>(*I); - #endif - return reinterpret_cast<T**>(Vals); - + For Windows download pre-built MSVC 9.0 or MinGW binaries from + http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment + variable to the extracted path. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -95,9 +84,9 @@ Requirements Building ======== -To build everything invoke scons as: +To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false -k + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -105,12 +94,15 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as but the rest of these instructions assume that scons is used. +For windows is everything the except except the winsys: + + scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false Using ===== -Building will create a drop-in alternative for libGL.so. To use it set the -environment variables: +On Linux, building will create a drop-in alternative for libGL.so. To use it +set the environment variables: export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH @@ -121,6 +113,11 @@ or For performance evaluation pass debug=no to scons, and use the corresponding lib directory without the "-debug" suffix. +On Windows, building will create a drop-in alternative for opengl32.dll. To use +it put it in the same directory as the application. It can also be used by +replacing the native ICD driver, but it's quite an advanced usage, so if you +need to ask, don't even try it. + Unit testing ============ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 3ca676647c..6bb545a501 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -66,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_vs.c', 'lp_surface.c', 'lp_tex_cache.c', - 'lp_tex_sample_c.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', 'lp_tile_cache.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba9..ced7b9c11d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff8..25c10af29f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -47,7 +47,7 @@ */ enum lp_build_flow_construct_kind { lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_SKIP }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp index d3f78c06d9..6e79438ead 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp +++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp @@ -59,3 +59,17 @@ LLVMInitializeNativeTarget(void) #endif + + +/* + * Hack to allow the linking of release LLVM static libraries on a debug build. + * + * See also: + * - http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/7234ea2b-0042-42ed-b4e2-5d8644dfb57d + */ +#if defined(_MSC_VER) && defined(_DEBUG) +#include <crtdefs.h> +extern "C" { + _CRTIMP void __cdecl _invalid_parameter_noinfo(void) {} +} +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c index af70ddc6ab..9003e108c1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c @@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->min_img_filter = sampler->min_img_filter; state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; - if(sampler->compare_mode) { - state->compare_mode = sampler->compare_mode; + state->compare_mode = sampler->compare_mode; + if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { state->compare_func = sampler->compare_func; } state->normalized_coords = sampler->normalized_coords; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c index 47b68b71e2..5ee8d556a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c @@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef res; unsigned chan; - if(!bld->static_state->compare_mode) + if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) return; /* TODO: Compare before swizzling, to avoid redundant computations */ @@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel); break; case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: if(lp_format_is_rgba8(bld.format_desc)) lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel); else diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 7cfa4cc59a..fb1eda4423 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, if (projected) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } + for (i = num_coords; i < 3; i++) { + coords[i] = bld->base.undef; + } bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, @@ -1315,7 +1318,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 37587d4f79..1cc3c9227c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -256,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen ) llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen); - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX; - llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i]; - llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i]; - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT; - llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i]; - llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i]; - } - /* * Create drawing context and plug our rendering stage into it. */ @@ -279,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen ) if (!llvmpipe->draw) goto fail; - draw_texture_samplers(llvmpipe->draw, - PIPE_MAX_VERTEX_SAMPLERS, - (struct tgsi_sampler **) - llvmpipe->tgsi.vert_samplers_list); + /* FIXME: devise alternative to draw_texture_samplers */ if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index cc4d5ad5fd..6411797cf5 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -115,14 +115,6 @@ struct llvmpipe_context { unsigned line_stipple_counter; - /** TGSI exec things */ - struct { - struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; - struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; - } tgsi; - /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index a96c2cad9d..c152b4413f 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -45,11 +45,11 @@ -boolean +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); + llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -122,20 +122,18 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, /* Note: leave drawing surfaces mapped */ lp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return llvmpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + llvmpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bce3baec16..4ef0783f3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_context */ { - LLVMTypeRef elem_types[5]; + LLVMTypeRef elem_types[4]; LLVMTypeRef context_type; elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */ - elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - elem_types[2] = LLVMFloatType(); /* alpha_ref_value */ - elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ - elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ + elem_types[1] = LLVMFloatType(); /* alpha_ref_value */ + elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */ + elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */ context_type = LLVMStructType(elem_types, Elements(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, screen->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, - screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, - screen->target, context_type, 2); + screen->target, context_type, 1); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color, - screen->target, context_type, 3); + screen->target, context_type, 2); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, screen->target, context_type, LP_JIT_CONTEXT_TEXTURES_INDEX); @@ -109,24 +106,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) screen->context_ptr_type = LLVMPointerType(context_type, 0); } - /* fetch_texel - */ - { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[3]; - LLVMValueRef fetch_texel; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */ - arg_types[1] = LLVMInt32Type(); /* unit */ - arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */ - - fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel", - ret_type, arg_types, Elements(arg_types)); - - LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa); - } - #ifdef DEBUG LLVMDumpModule(screen->module); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 58f716ede2..277b690c02 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -41,7 +41,6 @@ #include "pipe/p_state.h" -struct tgsi_sampler; struct llvmpipe_screen; @@ -78,8 +77,6 @@ struct lp_jit_context { const float *constants; - struct tgsi_sampler **samplers; - float alpha_ref_value; /* FIXME: store (also?) in floats */ @@ -92,16 +89,13 @@ struct lp_jit_context #define lp_jit_context_constants(_builder, _ptr) \ lp_build_struct_get(_builder, _ptr, 0, "constants") -#define lp_jit_context_samplers(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 1, "samplers") - #define lp_jit_context_alpha_ref_value(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value") + lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value") #define lp_jit_context_blend_color(_builder, _ptr) \ - lp_build_struct_get(_builder, _ptr, 3, "blend_color") + lp_build_struct_get(_builder, _ptr, 2, "blend_color") -#define LP_JIT_CONTEXT_TEXTURES_INDEX 4 +#define LP_JIT_CONTEXT_TEXTURES_INDEX 3 #define lp_jit_context_textures(_builder, _ptr) \ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures") @@ -118,12 +112,6 @@ typedef void void *color, void *depth); -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ); - - void lp_jit_screen_cleanup(struct llvmpipe_screen *screen); diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 5cee7bf74b..7020da145f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -56,7 +56,6 @@ #define LP_NEW_QUERY 0x4000 -struct tgsi_sampler; struct vertex_info; struct pipe_context; struct llvmpipe_context; @@ -197,14 +196,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp); void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ); -boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -boolean llvmpipe_draw_elements(struct pipe_context *pipe, +void llvmpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); -boolean +void llvmpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index acfd7be5f7..6c1ef6bc42 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp) } -static void -update_tgsi_samplers( struct llvmpipe_context *llvmpipe ) -{ - unsigned i; - - /* vertex shader samplers */ - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i]; - llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i]; - llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] ); - } - - /* fragment shader samplers */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i]; - llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i]; - llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples; - } - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] ); - } - - llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list; -} - /* Hopefully this will remain quite simple, otherwise need to pull in * something like the state tracker mechanism. */ @@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) } if (llvmpipe->dirty & (LP_NEW_SAMPLER | - LP_NEW_TEXTURE)) - update_tgsi_samplers( llvmpipe ); + LP_NEW_TEXTURE)) { + /* TODO */ + } if (llvmpipe->dirty & (LP_NEW_RASTERIZER | LP_NEW_FS | diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f2b8c36264..b73ca2d41e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp, debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE)); debug_printf(" .mag_img_filter = %s\n", debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE)); - if(key->sampler[i].compare_mode) - debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); + if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE)); debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords); debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter); } @@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp, a0_ptr, dadx_ptr, dady_ptr, x0, y0, 2, 0); -#if 0 - /* C texture sampling */ - sampler = lp_c_sampler_soa_create(context_ptr); -#else /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); -#endif for(i = 0; i < num_fs; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 968c7a2d4a..faddfb9677 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -330,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 9ad1bde956..cb59a94464 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -31,64 +31,11 @@ #include <llvm-c/Core.h> -#include "tgsi/tgsi_exec.h" - -struct llvmpipe_tex_tile_cache; struct lp_sampler_static_state; /** - * Subclass of tgsi_sampler - */ -struct lp_shader_sampler -{ - struct tgsi_sampler base; /**< base class */ - - unsigned processor; - - /* For lp_get_samples_2d_linear_POT: - */ - unsigned xpot; - unsigned ypot; - unsigned level; - - const struct pipe_texture *texture; - const struct pipe_sampler_state *sampler; - - struct llvmpipe_tex_tile_cache *cache; -}; - - - -static INLINE struct lp_shader_sampler * -lp_shader_sampler(const struct tgsi_sampler *sampler) -{ - return (struct lp_shader_sampler *) sampler; -} - - - -extern void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - - -/** - * Texture sampling code generator that just calls lp_get_samples C function - * for the actual sampling computation. - * - * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. - */ -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr); - - -/** * Pure-LLVM texture sampling code generator. * * @param context_ptr LLVM value with the pointer to the struct lp_jit_context. diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c deleted file mode 100644 index 68520fa4f0..0000000000 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ /dev/null @@ -1,1713 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Texture sampling - * - * Authors: - * Brian Paul - */ - -#include "lp_context.h" -#include "lp_quad.h" -#include "lp_surface.h" -#include "lp_texture.h" -#include "lp_tex_sample.h" -#include "lp_tex_cache.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "util/u_memory.h" - - - -/* - * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes - * see 1-pixel bands of improperly weighted linear-filtered textures. - * The tests/texwrap.c demo is a good test. - * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. - * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). - */ -#define FRAC(f) ((f) - util_ifloor(f)) - - -/** - * Linear interpolation macro - */ -static INLINE float -lerp(float a, float v0, float v1) -{ - return v0 + a * (v1 - v0); -} - - -/** - * Do 2D/biliner interpolation of float values. - * v00, v10, v01 and v11 are typically four texture samples in a square/box. - * a and b are the horizontal and vertical interpolants. - * It's important that this function is inlined when compiled with - * optimization! If we find that's not true on some systems, convert - * to a macro. - */ -static INLINE float -lerp_2d(float a, float b, - float v00, float v10, float v01, float v11) -{ - const float temp0 = lerp(a, v00, v10); - const float temp1 = lerp(a, v01, v11); - return lerp(b, temp0, temp1); -} - - -/** - * As above, but 3D interpolation of 8 values. - */ -static INLINE float -lerp_3d(float a, float b, float c, - float v000, float v100, float v010, float v110, - float v001, float v101, float v011, float v111) -{ - const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); - const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); - return lerp(c, temp0, temp1); -} - - - -/** - * If A is a signed integer, A % B doesn't give the right value for A < 0 - * (in terms of texture repeat). Just casting to unsigned fixes that. - */ -#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) - - -/** - * Apply texture coord wrapping mode and return integer texture indexes - * for a vector of four texcoords (S or T or P). - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the incoming texcoords - * \param size the texture image size - * \param icoord returns the integer texcoords - * \return integer texture index - */ -static INLINE void -nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - /* s limited to [0,1) */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch] * size); - icoord[ch] = REMAINDER(i, size); - } - return; - case PIPE_TEX_WRAP_CLAMP: - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= 0.0F) - icoord[ch] = 0; - else if (s[ch] >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] < min) - icoord[ch] = 0; - else if (s[ch] > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [-1, size] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - if (s[ch] <= min) - icoord[ch] = -1; - else if (s[ch] >= max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(s[ch] * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* s limited to [0,1] */ - /* i limited to [0,size-1] */ - const float u = fabsf(s[ch]); - if (u <= 0.0F) - icoord[ch] = 0; - else if (u >= 1.0F) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = 1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = 0; - else if (u > max) - icoord[ch] = size - 1; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - /* s limited to [min,max] */ - /* i limited to [0, size-1] */ - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - const float u = fabsf(s[ch]); - if (u < min) - icoord[ch] = -1; - else if (u > max) - icoord[ch] = size; - else - icoord[ch] = util_ifloor(u * size); - } - } - return; - default: - assert(0); - } -} - - -/** - * Used to compute texel locations for linear sampling for four texcoords. - * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoords - * \param size the texture image size - * \param icoord0 returns first texture indexes - * \param icoord1 returns second texture indexes (usually icoord0 + 1) - * \param w returns blend factor/weight between texture indexes - * \param icoord returns the computed integer texture coords - */ -static INLINE void -linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - - switch (wrapMode) { - case PIPE_TEX_WRAP_REPEAT: - for (ch = 0; ch < 4; ch++) { - float u = s[ch] * size - 0.5F; - icoord0[ch] = REMAINDER(util_ifloor(u), size); - icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.0F, 1.0F); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], min, max); - u = u * size - 0.5f; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - for (ch = 0; ch < 4; ch++) { - const int flr = util_ifloor(s[ch]); - float u; - if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; - u = u * size - 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord0[ch] < 0) - icoord0[ch] = 0; - if (icoord1[ch] >= (int) size) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break;; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - { - const float min = -1.0F / (2.0F * size); - const float max = 1.0F - min; - for (ch = 0; ch < 4; ch++) { - float u = fabsf(s[ch]); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - } - break;; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. - */ -static INLINE void -nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - int i = util_ifloor(s[ch]); - icoord[ch]= CLAMP(i, 0, (int) size-1); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); - } - return; - default: - assert(0); - } -} - - -/** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. - */ -static INLINE void -linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, - int icoord0[4], int icoord1[4], float w[4]) -{ - uint ch; - switch (wrapMode) { - case PIPE_TEX_WRAP_CLAMP: - for (ch = 0; ch < 4; ch++) { - /* Not exactly what the spec says, but it matches NVIDIA output */ - float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - w[ch] = FRAC(u); - } - return; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - /* fall-through */ - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); - u -= 0.5F; - icoord0[ch] = util_ifloor(u); - icoord1[ch] = icoord0[ch] + 1; - if (icoord1[ch] > (int) size - 1) - icoord1[ch] = size - 1; - w[ch] = FRAC(u); - } - break; - default: - assert(0); - } -} - - -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = ( sc / ma + 1.0F ) * 0.5F; - *newT = ( tc / ma + 1.0F ) * 0.5F; - - return face; -} - - -/** - * Examine the quad's texture coordinates to compute the partial - * derivatives w.r.t X and Y, then compute lambda (level of detail). - * - * This is only done for fragment shaders, not vertex shaders. - */ -static float -compute_lambda(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - float rho, lambda; - - if (samp->processor == TGSI_PROCESSOR_VERTEX) - return lodbias; - - assert(sampler->normalized_coords); - - assert(s); - { - float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; - float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * texture->width0; - } - if (t) { - float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; - float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; - float max; - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * texture->height0; - rho = MAX2(rho, max); - } - if (p) { - float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; - float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; - float max; - dpdx = fabsf(dpdx); - dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * texture->depth0; - rho = MAX2(rho, max); - } - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; -} - - -/** - * Do several things here: - * 1. Compute lambda from the texcoords, if needed - * 2. Determine if we're minifying or magnifying - * 3. If minifying, choose mipmap levels - * 4. Return image filter to use within mipmap images - * \param level0 Returns first mipmap level to sample from - * \param level1 Returns second mipmap level to sample from - * \param levelBlend Returns blend factor between levels, in [0,1] - * \param imgFilter Returns either the min or mag filter, depending on lambda - */ -static void -choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - unsigned *level0, unsigned *level1, float *levelBlend, - unsigned *imgFilter) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->min_img_filter != sampler->mag_img_filter) { - /* non-mipmapped texture, but still need to determine if doing - * minification or magnification. - */ - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - if (lambda <= 0.0) { - *imgFilter = sampler->mag_img_filter; - } - else { - *imgFilter = sampler->min_img_filter; - } - } - else { - *imgFilter = sampler->mag_img_filter; - } - } - else { - float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - - if (lambda <= 0.0) { /* XXX threshold depends on the filter */ - /* magnifying */ - *imgFilter = sampler->mag_img_filter; - *level0 = *level1 = 0; - } - else { - /* minifying */ - *imgFilter = sampler->min_img_filter; - - /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { - /* Nearest mipmap level */ - const int lvl = (int) (lambda + 0.5); - *level0 = - *level1 = CLAMP(lvl, 0, (int) texture->last_level); - } - else { - /* Linear interpolation between mipmap levels */ - const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); - *levelBlend = FRAC(lambda); /* blending weight between levels */ - } - } - } -} - - -/** - * Get a texel from a texture, using the texture tile cache. - * - * \param face the cube face in 0..5 - * \param level the mipmap level - * \param x the x coord of texel within 2D image - * \param y the y coord of texel within 2D image - * \param z which slice of a 3D texture - * \param rgba the quad to put the texel/color into - * \param j which element of the rgba quad to write to - * - * XXX maybe move this into lp_tile_cache.c and merge with the - * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... - */ -static void -get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, - const uint8_t *out[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - out[0] = &tile->color[y ][x ][0]; - out[1] = &tile->color[y ][x+1][0]; - out[2] = &tile->color[y+1][x ][0]; - out[3] = &tile->color[y+1][x+1][0]; -} - -static INLINE const uint8_t * -get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - - const struct llvmpipe_cached_tex_tile *tile - = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, 0, face, level)); - - y %= TEX_TILE_SIZE; - x %= TEX_TILE_SIZE; - - return &tile->color[y][x][0]; -} - - -static void -get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, - int x0, int y0, - int x1, int y1, - const uint8_t *out[4]) -{ - unsigned i; - - for (i = 0; i < 4; i++) { - unsigned tx = (i & 1) ? x1 : x0; - unsigned ty = (i >> 1) ? y1 : y0; - - out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty ); - } -} - -static void -get_texel(const struct tgsi_sampler *tgsi_sampler, - unsigned face, unsigned level, int x, int y, int z, - float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - if (x < 0 || x >= (int) u_minify(texture->width0, level) || - y < 0 || y >= (int) u_minify(texture->height0, level) || - z < 0 || z >= (int) u_minify(texture->depth0, level)) { - rgba[0][j] = sampler->border_color[0]; - rgba[1][j] = sampler->border_color[1]; - rgba[2][j] = sampler->border_color[2]; - rgba[3][j] = sampler->border_color[3]; - } - else { - const unsigned tx = x % TEX_TILE_SIZE; - const unsigned ty = y % TEX_TILE_SIZE; - const struct llvmpipe_cached_tex_tile *tile; - - tile = lp_get_cached_tex_tile(samp->cache, - tex_tile_address(x, y, z, face, level)); - - rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]); - rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]); - rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]); - rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]); - if (0) - { - debug_printf("Get texel %f %f %f %f from %s\n", - rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(texture->format)); - } - } -} - - -/** - * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' - * When we sampled the depth texture, the depth value was put into all - * RGBA channels. We look at the red channel here. - * \param rgba quad of (depth) texel values - * \param p texture 'P' components for four pixels in quad - * \param j which pixel in the quad to test [0..3] - */ -static INLINE void -shadow_compare(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE], - uint j) -{ - int k; - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k = p[j] < rgba[0][j]; - break; - case PIPE_FUNC_LEQUAL: - k = p[j] <= rgba[0][j]; - break; - case PIPE_FUNC_GREATER: - k = p[j] > rgba[0][j]; - break; - case PIPE_FUNC_GEQUAL: - k = p[j] >= rgba[0][j]; - break; - case PIPE_FUNC_EQUAL: - k = p[j] == rgba[0][j]; - break; - case PIPE_FUNC_NOTEQUAL: - k = p[j] != rgba[0][j]; - break; - case PIPE_FUNC_ALWAYS: - k = 1; - break; - case PIPE_FUNC_NEVER: - k = 0; - break; - default: - k = 0; - assert(0); - break; - } - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; - rgba[3][j] = 1.0F; -} - - -/** - * As above, but do four z/texture comparisons. - */ -static INLINE void -shadow_compare4(const struct pipe_sampler_state *sampler, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const float p[QUAD_SIZE]) -{ - int j, k0, k1, k2, k3; - float val; - - /* compare four texcoords vs. four texture samples */ - switch (sampler->compare_func) { - case PIPE_FUNC_LESS: - k0 = p[0] < rgba[0][0]; - k1 = p[1] < rgba[0][1]; - k2 = p[2] < rgba[0][2]; - k3 = p[3] < rgba[0][3]; - break; - case PIPE_FUNC_LEQUAL: - k0 = p[0] <= rgba[0][0]; - k1 = p[1] <= rgba[0][1]; - k2 = p[2] <= rgba[0][2]; - k3 = p[3] <= rgba[0][3]; - break; - case PIPE_FUNC_GREATER: - k0 = p[0] > rgba[0][0]; - k1 = p[1] > rgba[0][1]; - k2 = p[2] > rgba[0][2]; - k3 = p[3] > rgba[0][3]; - break; - case PIPE_FUNC_GEQUAL: - k0 = p[0] >= rgba[0][0]; - k1 = p[1] >= rgba[0][1]; - k2 = p[2] >= rgba[0][2]; - k3 = p[3] >= rgba[0][3]; - break; - case PIPE_FUNC_EQUAL: - k0 = p[0] == rgba[0][0]; - k1 = p[1] == rgba[0][1]; - k2 = p[2] == rgba[0][2]; - k3 = p[3] == rgba[0][3]; - break; - case PIPE_FUNC_NOTEQUAL: - k0 = p[0] != rgba[0][0]; - k1 = p[1] != rgba[0][1]; - k2 = p[2] != rgba[0][2]; - k3 = p[3] != rgba[0][3]; - break; - case PIPE_FUNC_ALWAYS: - k0 = k1 = k2 = k3 = 1; - break; - case PIPE_FUNC_NEVER: - k0 = k1 = k2 = k3 = 0; - break; - default: - k0 = k1 = k2 = k3 = 0; - assert(0); - break; - } - - /* convert four pass/fail values to an intensity in [0,1] */ - val = 0.25F * (k0 + k1 + k2 + k3); - - /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ - for (j = 0; j < 4; j++) { - rgba[0][j] = rgba[1][j] = rgba[2][j] = val; - rgba[3][j] = 1.0F; - } -} - - - -static void -lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */ - unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */ - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot - 0.5F; - float v = t[j] * ypot - 0.5F; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - float xw = u - (float)uflr; - float yw = v - (float)vflr; - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *tx[4]; - - - /* Can we fetch all four at once: - */ - if (x0 < xmax && y0 < ymax) - { - get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx); - } - else - { - unsigned x1 = (x0 + 1) & (xpot - 1); - unsigned y1 = (y0 + 1) & (ypot - 1); - get_texel_quad_2d_mt(tgsi_sampler, 0, level, - x0, y0, x1, y1, tx); - } - - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw, yw, - ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]), - ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c])); - } - } -} - - -static void -lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int uflr = util_ifloor(u); - int vflr = util_ifloor(v); - - int x0 = uflr & (xpot - 1); - int y0 = vflr & (ypot - 1); - - const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - unsigned j; - unsigned level = samp->level; - unsigned xpot = 1 << (samp->xpot - level); - unsigned ypot = 1 << (samp->ypot - level); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - - float u = s[j] * xpot; - float v = t[j] * ypot; - - int x0, y0; - const uint8_t *out; - - x0 = util_ifloor(u); - if (x0 < 0) - x0 = 0; - else if (x0 > xpot - 1) - x0 = xpot - 1; - - y0 = util_ifloor(v); - if (y0 < 0) - y0 = 0; - else if (y0 > ypot - 1) - y0 = ypot - 1; - - out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0); - - for (c = 0; c < 4; c++) { - rgba[c][j] = ubyte_to_float(out[c]); - } - } -} - - -static void -lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - int level0; - float lambda; - - lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias); - level0 = (int)lambda; - - if (lambda < 0.0) { - samp->level = 0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else if (level0 >= texture->last_level) { - samp->level = texture->last_level; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba ); - } - else { - float levelBlend = lambda - level0; - float rgba0[4][4]; - float rgba1[4][4]; - int c,j; - - samp->level = level0; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba0 ); - - samp->level = level0+1; - lp_get_samples_2d_linear_repeat_POT( tgsi_sampler, - s, t, p, 0, rgba1 ); - - for (j = 0; j < QUAD_SIZE; j++) { - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]); - } - } - } -} - -/** - * Common code for sampling 1D/2D/cube textures. - * Could probably extend for 3D... - */ -static void -lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE], - const unsigned faces[4]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend = 0.0f; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, - rgba2, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(sampler, rgba2, p, j); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], - tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - - /* XXX: This is incorrect -- will often end up with (x0 - * == x1 && y0 == y1), meaning that we fetch the same - * texel four times and linearly interpolate between - * identical values. The correct approach would be to - * call linear_texcoord again for the second level. - */ - x0[j] /= 2; - y0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare4(sampler, tx, p); - } - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static INLINE void -lp_get_samples_1d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - static const float tzero[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, tzero, NULL, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_2d(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - static const unsigned faces[4] = {0, 0, 0, 0}; - lp_get_samples_2d_common(sampler, s, t, p, - lodbias, rgba, faces); -} - - -static INLINE void -lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - /* get/map pipe_surfaces corresponding to 3D tex slices */ - unsigned level0, level1, j, imgFilter; - int width, height, depth; - float levelBlend = 0.0f; - const uint face = 0; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - assert(sampler->normalized_coords); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - depth = u_minify(texture->depth0, level0); - - assert(width > 0); - assert(height > 0); - assert(depth > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4], z[4]; - nearest_texcoord_4(sampler->wrap_s, s, width, x); - nearest_texcoord_4(sampler->wrap_t, t, height, y); - nearest_texcoord_4(sampler->wrap_r, p, depth, z); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x[j] /= 2; - y[j] /= 2; - z[j] /= 2; - get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); - } - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; - float xw[4], yw[4], zw[4]; /* interpolation weights */ - linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); - linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); - - for (j = 0; j < QUAD_SIZE; j++) { - int c; - float tx0[4][4], tx1[4][4]; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0[j] /= 2; - y0[j] /= 2; - z0[j] /= 2; - x1[j] /= 2; - y1[j] /= 2; - z1[j] /= 2; - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); - get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); - get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); - get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); - get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); - - /* interpolate R, G, B, A */ - for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], - tx0[c][0], tx0[c][1], - tx0[c][2], tx0[c][3], - tx1[c][0], tx1[c][1], - tx1[c][2], tx1[c][3]); - } - - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); - } - } - } - } - break; - default: - assert(0); - } -} - - -static void -lp_get_samples_cube(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - unsigned faces[QUAD_SIZE], j; - float ssss[4], tttt[4]; - for (j = 0; j < QUAD_SIZE; j++) { - faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); - } - lp_get_samples_2d_common(sampler, ssss, tttt, NULL, - lodbias, rgba, faces); -} - - -static void -lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - const uint face = 0; - unsigned level0, level1, j, imgFilter; - int width, height; - float levelBlend; - - choose_mipmap_levels(tgsi_sampler, s, t, p, - lodbias, - &level0, &level1, &levelBlend, &imgFilter); - - /* texture RECTS cannot be mipmapped */ - assert(level0 == level1); - - width = u_minify(texture->width0, level0); - height = u_minify(texture->height0, level0); - - assert(width > 0); - - switch (imgFilter) { - case PIPE_TEX_FILTER_NEAREST: - { - int x[4], y[4]; - nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); - nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); - for (j = 0; j < QUAD_SIZE; j++) { - get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(sampler, rgba, p, j); - } - } - } - break; - case PIPE_TEX_FILTER_LINEAR: - case PIPE_TEX_FILTER_ANISO: - { - int x0[4], y0[4], x1[4], y1[4]; - float xw[4], yw[4]; /* weights */ - linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); - linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4]; /* texels */ - int c; - get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); - get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); - get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); - get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); - if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare4(sampler, tx, p); - } - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(xw[j], yw[j], - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - } - } - break; - default: - assert(0); - } -} - - -/** - * Error condition handler - */ -static INLINE void -lp_get_samples_null(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - int i,j; - - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - rgba[i][j] = 1.0; -} - -/** - * Called via tgsi_sampler::get_samples() when using a sampler for the - * first time. Determine the actual sampler function, link it in and - * call it. - */ -void -lp_get_samples(struct tgsi_sampler *tgsi_sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]) -{ - struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler); - const struct pipe_texture *texture = samp->texture; - const struct pipe_sampler_state *sampler = samp->sampler; - - /* Default to the 'undefined' case: - */ - tgsi_sampler->get_samples = lp_get_samples_null; - - if (!texture) { - assert(0); /* is this legal?? */ - goto out; - } - - if (!sampler->normalized_coords) { - assert (texture->target == PIPE_TEXTURE_2D); - tgsi_sampler->get_samples = lp_get_samples_rect; - goto out; - } - - switch (texture->target) { - case PIPE_TEXTURE_1D: - tgsi_sampler->get_samples = lp_get_samples_1d; - break; - case PIPE_TEXTURE_2D: - tgsi_sampler->get_samples = lp_get_samples_2d; - break; - case PIPE_TEXTURE_3D: - tgsi_sampler->get_samples = lp_get_samples_3d; - break; - case PIPE_TEXTURE_CUBE: - tgsi_sampler->get_samples = lp_get_samples_cube; - break; - default: - assert(0); - break; - } - - /* Do this elsewhere: - */ - samp->xpot = util_unsigned_logbase2( samp->texture->width0 ); - samp->ypot = util_unsigned_logbase2( samp->texture->height0 ); - - /* Try to hook in a faster sampler. Ultimately we'll have to - * code-generate these. Luckily most of this looks like it is - * orthogonal state within the sampler. - */ - if (texture->target == PIPE_TEXTURE_2D && - sampler->min_img_filter == sampler->mag_img_filter && - sampler->wrap_s == sampler->wrap_t && - sampler->compare_mode == FALSE && - sampler->normalized_coords) - { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { - samp->level = CLAMP((int) sampler->min_lod, - 0, (int) texture->last_level); - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT; - break; - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT; - break; - default: - break; - } - } - else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_NEAREST: - tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT; - break; - default: - break; - } - } - } - else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) { - switch (sampler->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT; - break; - default: - break; - } - } - } - } - else if (0) { - _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n", - texture->target, PIPE_TEXTURE_2D, - sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE, - sampler->min_img_filter, sampler->mag_img_filter, - sampler->wrap_s, sampler->wrap_t, - sampler->compare_mode, FALSE, - sampler->normalized_coords, TRUE); - } - -out: - tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba ); -} - - -void PIPE_CDECL -lp_fetch_texel_soa( struct tgsi_sampler **samplers, - uint32_t unit, - float *store ) -{ - struct tgsi_sampler *sampler = samplers[unit]; - -#if 0 - uint j; - - debug_printf("%s sampler: %p (%p) store: %p\n", - __FUNCTION__, - sampler, *sampler, - store ); - - debug_printf("lodbias %f\n", store[12]); - - for (j = 0; j < 4; j++) - debug_printf("sample %d texcoord %f %f\n", - j, - store[0+j], - store[4+j]); -#endif - - { - float rgba[NUM_CHANNELS][QUAD_SIZE]; - sampler->get_samples(sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); - memcpy(store, rgba, sizeof rgba); - } - -#if 0 - for (j = 0; j < 4; j++) - debug_printf("sample %d result %f %f %f %f\n", - j, - store[0+j], - store[4+j], - store[8+j], - store[12+j]); -#endif -} - - -#include "lp_bld_type.h" -#include "lp_bld_intr.h" -#include "lp_bld_tgsi.h" - - -struct lp_c_sampler_soa -{ - struct lp_build_sampler_soa base; - - LLVMValueRef context_ptr; - - LLVMValueRef samplers_ptr; - - /** Coords/texels store */ - LLVMValueRef store_ptr; -}; - - -static void -lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) -{ - FREE(sampler); -} - - -static void -lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler, - LLVMBuilderRef builder, - struct lp_type type, - unsigned unit, - unsigned num_coords, - const LLVMValueRef *coords, - LLVMValueRef lodbias, - LLVMValueRef *texel) -{ - struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler; - LLVMTypeRef vec_type = LLVMTypeOf(coords[0]); - LLVMValueRef args[3]; - unsigned i; - - if(!sampler->samplers_ptr) - sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr); - - if(!sampler->store_ptr) - sampler->store_ptr = LLVMBuildArrayAlloca(builder, - vec_type, - LLVMConstInt(LLVMInt32Type(), 4, 0), - "texel_store"); - - for (i = 0; i < num_coords; i++) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - LLVMBuildStore(builder, coords[i], coord_ptr); - } - - args[0] = sampler->samplers_ptr; - args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0); - args[2] = sampler->store_ptr; - - lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3); - - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, ""); - texel[i] = LLVMBuildLoad(builder, texel_ptr, ""); - } -} - - -struct lp_build_sampler_soa * -lp_c_sampler_soa_create(LLVMValueRef context_ptr) -{ - struct lp_c_sampler_soa *sampler; - - sampler = CALLOC_STRUCT(lp_c_sampler_soa); - if(!sampler) - return NULL; - - sampler->base.destroy = lp_c_sampler_soa_destroy; - sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel; - sampler->context_ptr = context_ptr; - - return &sampler->base; -} - diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h deleted file mode 100644 index 9c235080a5..0000000000 --- a/src/gallium/drivers/nouveau/nouveau_push.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef __NOUVEAU_PUSH_H__ -#define __NOUVEAU_PUSH_H__ - -#include "nouveau/nouveau_winsys.h" - -#ifndef NOUVEAU_PUSH_CONTEXT -#error undefined push context -#endif - -#define OUT_RING(data) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - (*pc->base.channel->pushbuf->cur++) = (data); \ -} while(0) - -#define OUT_RINGp(src,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4); \ - pc->base.channel->pushbuf->cur += (size); \ -} while(0) - -#define OUT_RINGf(data) do { \ - union { float v; uint32_t u; } c; \ - c.v = (data); \ - OUT_RING(c.u); \ -} while(0) - -#define BEGIN_RING(obj,mthd,size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#define BEGIN_RING_NI(obj,mthd,size) do { \ - BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ -} while(0) - -static inline void -DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence) -{ - nouveau_pushbuf_flush(chan, 0); - if (fence) - *fence = NULL; -} - -#define FIRE_RING(fence) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - DO_FIRE_RING(pc->base.channel, fence); \ -} while(0) - -#define OUT_RELOC(bo,data,flags,vor,tor) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \ - (data), 0, (flags), (vor), (tor)); \ -} while(0) - -/* Raw data + flags depending on FB/TT buffer */ -#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ - OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ -} while(0) - -/* FB/TT object handle */ -#define OUT_RELOCo(bo,flags) do { \ - OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ - pc->base.channel->vram->handle, \ - pc->base.channel->gart->handle); \ -} while(0) - -/* Low 32-bits of offset */ -#define OUT_RELOCl(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ -} while(0) - -/* High 32-bits of offset */ -#define OUT_RELOCh(bo,delta,flags) do { \ - OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ -} while(0) - -/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ -#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ - NOUVEAU_PUSH_CONTEXT(pc); \ - struct nouveau_channel *chan = pc->base.channel; \ - if (chan->pushbuf->remaining < ((size) + 1)) \ - nouveau_pushbuf_flush(chan, ((size) + 1)); \ - OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ - (flags), 0, 0); \ - chan->pushbuf->remaining -= ((size) + 1); \ -} while(0) - -#endif diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 0437af3725..7ebc94ed6c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage)); if (ret) { debug_printf("map failed: %d\n", ret); @@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned offset, unsigned length, unsigned usage) { struct nouveau_bo *bo = nouveau_bo(pb); + struct nouveau_screen *nscreen = nouveau_screen(pscreen); uint32_t flags = nouveau_screen_map_flags(usage); int ret; + if (nscreen->pre_pipebuffer_map_callback) { + ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage); + if (ret) { + debug_printf("pre_pipebuffer_map_callback failed %d\n", + ret); + return NULL; + } + } + ret = nouveau_bo_map_range(bo, offset, length, flags); if (ret) { + nouveau_bo_unmap(bo); if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY) debug_printf("map_range failed: %d\n", ret); return NULL; diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index ebfc67ad1c..a7927d88df 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -5,6 +5,9 @@ struct nouveau_screen { struct pipe_screen base; struct nouveau_device *device; struct nouveau_channel *channel; + + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, + struct pipe_buffer *pb, unsigned usage); }; static inline struct nouveau_screen * diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h index 9aee9e4956..e844f6abb3 100644 --- a/src/gallium/drivers/nouveau/nouveau_stateobj.h +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -3,41 +3,95 @@ #include "util/u_debug.h" +#ifdef DEBUG +#define DEBUG_NOUVEAU_STATEOBJ +#endif /* DEBUG */ + struct nouveau_stateobj_reloc { struct nouveau_bo *bo; - unsigned offset; - unsigned packet; + struct nouveau_grobj *gr; + uint32_t push_offset; + uint32_t mthd; - unsigned data; + uint32_t data; unsigned flags; unsigned vor; unsigned tor; }; +struct nouveau_stateobj_start { + struct nouveau_grobj *gr; + uint32_t mthd; + uint32_t size; + unsigned offset; +}; + struct nouveau_stateobj { struct pipe_reference reference; - unsigned *push; + struct nouveau_stateobj_start *start; struct nouveau_stateobj_reloc *reloc; - unsigned *cur; - unsigned cur_packet; + /* Common memory pool for data. */ + uint32_t *pool; + unsigned pool_cur; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + unsigned start_alloc; + unsigned reloc_alloc; + unsigned pool_alloc; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + unsigned total; /* includes begin_ring */ + unsigned cur; /* excludes begin_ring, offset from "cur_start" */ + unsigned cur_start; unsigned cur_reloc; }; +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr, total = 0; + + for (i = 0; i < so->cur_start; i++) { + if (so->start[i].gr->subc > -1) + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | (so->start[i].gr->subc << 13) + | so->start[i].mthd); + else + debug_printf("+0x%04x: 0x%08x\n", total++, + (so->start[i].size << 18) | so->start[i].mthd); + for (nr = 0; nr < so->start[i].size; nr++, total++) + debug_printf("+0x%04x: 0x%08x\n", total, + so->pool[so->start[i].offset + nr]); + } +} + static INLINE struct nouveau_stateobj * -so_new(unsigned push, unsigned reloc) +so_new(unsigned start, unsigned push, unsigned reloc) { struct nouveau_stateobj *so; so = MALLOC(sizeof(struct nouveau_stateobj)); pipe_reference_init(&so->reference, 1); - so->push = MALLOC(sizeof(unsigned) * push); - so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + so->total = so->cur = so->cur_start = so->cur_reloc = 0; - so->cur = so->push; - so->cur_reloc = so->cur_packet = 0; +#ifdef DEBUG_NOUVEAU_STATEOBJ + so->start_alloc = start; + so->reloc_alloc = reloc; + so->pool_alloc = push; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start)); + so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc)); + so->pool = MALLOC(push * sizeof(uint32_t)); + so->pool_cur = 0; + + if (!so->start || !so->reloc || !so->pool) { + debug_printf("malloc failed\n"); + assert(0); + } return so; } @@ -48,63 +102,128 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) struct nouveau_stateobj *so = *pso; int i; - if (pipe_reference(&(*pso)->reference, &ref->reference)) { - free(so->push); + if (pipe_reference(&(*pso)->reference, &ref->reference)) { + FREE(so->start); for (i = 0; i < so->cur_reloc; i++) nouveau_bo_ref(NULL, &so->reloc[i].bo); - free(so->reloc); - free(so); + FREE(so->reloc); + FREE(so->pool); + FREE(so); } *pso = ref; } static INLINE void -so_data(struct nouveau_stateobj *so, unsigned data) +so_data(struct nouveau_stateobj *so, uint32_t data) { - (*so->cur++) = (data); - so->cur_packet += 4; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur >= so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data; } static INLINE void -so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size) { - so->cur_packet += (4 * size); +#ifdef DEBUG_NOUVEAU_STATEOBJ + if ((so->cur + size) > so->start[so->cur_start - 1].size) { + debug_printf("exceeding specified size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + while (size--) - (*so->cur++) = (*data++); + so->pool[so->start[so->cur_start - 1].offset + so->cur++] = + *data++; } static INLINE void so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, unsigned mthd, unsigned size) { - so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); - so_data(so, (gr->subc << 13) | (size << 18) | mthd); + struct nouveau_stateobj_start *start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start_alloc <= so->cur_start) { + debug_printf("exceeding num_start size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + start = so->start; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) { + debug_printf("previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + so->start = start; + start[so->cur_start].gr = gr; + start[so->cur_start].mthd = mthd; + start[so->cur_start].size = size; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->pool_alloc < (size + so->pool_cur)) { + debug_printf("exceeding num_pool size\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + start[so->cur_start].offset = so->pool_cur; + so->pool_cur += size; + + so->cur_start++; + /* The 1 is for *this* begin_ring. */ + so->total += so->cur + 1; + so->cur = 0; } static INLINE void so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo, unsigned data, unsigned flags, unsigned vor, unsigned tor) { - struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; - - r->bo = NULL; - nouveau_bo_ref(bo, &r->bo); - r->offset = so->cur - so->push; - r->packet = so->cur_packet; - r->data = data; - r->flags = flags; - r->vor = vor; - r->tor = tor; + struct nouveau_stateobj_reloc *r; + +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->reloc_alloc <= so->cur_reloc) { + debug_printf("exceeding num_reloc size\n"); + assert(0); + } else +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + r = so->reloc; + + so->reloc = r; + r[so->cur_reloc].bo = NULL; + nouveau_bo_ref(bo, &(r[so->cur_reloc].bo)); + r[so->cur_reloc].gr = so->start[so->cur_start-1].gr; + r[so->cur_reloc].push_offset = so->total + so->cur; + r[so->cur_reloc].data = data; + r[so->cur_reloc].flags = flags; + r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd + + (so->cur << 2); + r[so->cur_reloc].vor = vor; + r[so->cur_reloc].tor = tor; + so_data(so, data); + so->cur_reloc++; } -static INLINE void -so_dump(struct nouveau_stateobj *so) +/* Determine if this buffer object is referenced by this state object. */ +static INLINE boolean +so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo) { - unsigned i, nr = so->cur - so->push; + int i; + + for (i = 0; i < so->cur_reloc; i++) + if (so->reloc[i].bo == bo) + return true; - for (i = 0; i < nr; i++) - debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); + return false; } static INLINE void @@ -114,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so) unsigned nr, i; int ret = 0; - nr = so->cur - so->push; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (so->start[so->cur_start - 1].size > so->cur) { + debug_printf("emit: previous so_method was not filled\n"); + assert(0); + } +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* We cannot update total in case we so_emit again. */ + nr = so->total + so->cur; + /* This will flush if we need space. * We don't actually need the marker. */ if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) { debug_printf("so_emit failed marker emit with error %d\n", ret); - return; + assert(0); + } + + /* Submit data. This will ensure proper binding of objects. */ + for (i = 0; i < so->cur_start; i++) { + BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size); + OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size); } - pb->remaining -= nr; - memcpy(pb->cur, so->push, nr * 4); for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset, - r->bo, r->data, 0, r->flags, - r->vor, r->tor))) { + if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr + + r->push_offset, r->bo, r->data, + 0, r->flags, r->vor, r->tor))) { debug_printf("so_emit failed reloc with error %d\n", ret); - goto out; + assert(0); } } -out: - pb->cur += nr; } static INLINE void so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so) { struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *gr = NULL; unsigned i; int ret = 0; if (!so) return; - i = so->cur_reloc << 1; - /* This will flush if we need space. - * We don't actually need the marker. - */ - if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) { - debug_printf("so_emit_reloc_markers failed marker emit with" \ - "error %d\n", ret); - return; - } - pb->remaining -= i; - + /* If we need to flush in flush notify, then we have a problem anyway. */ for (i = 0; i < so->cur_reloc; i++) { struct nouveau_stateobj_reloc *r = &so->reloc[i]; - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->packet, 0, - (r->flags & (NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | - NOUVEAU_BO_RDWR)) | - NOUVEAU_BO_DUMMY, 0, 0))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1); - return; +#ifdef DEBUG_NOUVEAU_STATEOBJ + if (r->mthd & 0x40000000) { + debug_printf("error: NI mthd 0x%08X\n", r->mthd); + continue; } - if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, - r->data, 0, - r->flags | NOUVEAU_BO_DUMMY, - r->vor, r->tor))) { - debug_printf("so_emit_reloc_markers failed reloc" \ - "with error %d\n", ret); - pb->remaining += ((so->cur_reloc - i) << 1) - 1; - return; +#endif /* DEBUG_NOUVEAU_STATEOBJ */ + + /* The object needs to be bound and the system must know the + * subchannel is being used. Otherwise it will discard it. + */ + if (gr != r->gr) { + BEGIN_RING(chan, r->gr, 0x100, 1); + OUT_RING(chan, 0); + gr = r->gr; + } + + /* Some relocs really don't like to be hammered, + * NOUVEAU_BO_DUMMY makes sure it only + * happens when needed. + */ + ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) | + r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART + | NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); } + + ret = OUT_RELOC(chan, r->bo, r->data, r->flags | + NOUVEAU_BO_DUMMY, r->vor, r->tor); + if (ret) { + debug_printf("OUT_RELOC failed %d\n", ret); + assert(0); + } + + pb->remaining -= 2; } } diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 770733a4a1..edd96859cf 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv04->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe) static boolean nv04_init_hwctx(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); - OUT_RING(0); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(chan, 0); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(0x40182800); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, 0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); - OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); - OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); - OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); - OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); + OUT_RING(chan, 0x120001a4); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(chan, 0x332213a1); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(chan, 0x11001010); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(chan, 0x0); +// BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); - OUT_RING(0xff000000); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(chan, 0xff000000); - FIRE_RING (NULL); + FIRE_RING (chan); return TRUE; } diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h index 55326c787a..fe3b527423 100644 --- a/src/gallium/drivers/nv04/nv04_context.h +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv04_screen *ctx = nv04->screen -#include "nouveau/nouveau_push.h" - #include "nv04_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -141,9 +137,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04); extern void nv04_state_tex_update(struct nv04_context *nv04); /* nv04_vbo.c */ -extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv04_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv04_draw_elements( struct pipe_context *pipe, +extern void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index 25395edfd7..0b795ea243 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RINGp(buffer + VERTEX_SIZE * v4,8); - OUT_RINGp(buffer + VERTEX_SIZE * v5,8); - OUT_RING(0xFEDCBA); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8); + OUT_RING(chan, 0xFEDCBA); } static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RING(0xFED); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RING(chan, 0xFED); } static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); - OUT_RINGp(buffer + VERTEX_SIZE * v0,8); - OUT_RINGp(buffer + VERTEX_SIZE * v1,8); - OUT_RINGp(buffer + VERTEX_SIZE * v2,8); - OUT_RINGp(buffer + VERTEX_SIZE * v3,8); - OUT_RING(0xFECEDC); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8); + OUT_RING(chan, 0xFECEDC); } static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) @@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con { const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; for(i = 0; i<nr_indices; i+=14) @@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8); for(j = 0; j<numvert; j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) - OUT_RING(striptbl[j]); + OUT_RING(chan, striptbl[j]); if (numtri%2) - OUT_RING(striptbl[numtri/2]&0xFFF); + OUT_RING(chan, striptbl[numtri/2]&0xFFF); } } @@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const { const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; unsigned char* buffer = render->buffer; - struct nv04_context* nv04 = render->nv04; + struct nv04_context *nv04 = render->nv04; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; int i,j; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); - OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) { @@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) - OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) - OUT_RING(fantbl[j]); + OUT_RING(chan, fantbl[j]); if (numtri%2) - OUT_RING(fantbl[numtri/2]&0xFFF); + OUT_RING(chan, fantbl[numtri/2]&0xFFF); } } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index bd98ae091f..b8d6dc560f 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f) static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, control); } static void nv04_emit_blend(struct nv04_context* nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; uint32_t blend; blend=0x4; // texture MODULATE_ALPHA @@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); - OUT_RING(blend); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(chan, blend); } static void nv04_emit_sampler(struct nv04_context *nv04, int unit) { struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING(nv04->sampler[unit]->filter); + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(chan, nv04->sampler[unit]->filter); } static void nv04_state_emit_framebuffer(struct nv04_context* nv04) @@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv04_miptree *nv04mt = 0; + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; + struct nouveau_bo *bo; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) assert(0); } - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); - OUT_RING(rt_format); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(chan, rt_format); nv04mt = (struct nv04_miptree *)rt->base.texture; + bo = nouveau_bo(nv04mt->buffer); /* FIXME pitches have to be aligned ! */ - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt->pitch|(zeta->pitch<<16)); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt->pitch|(zeta->pitch<<16)); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { nv04mt = (struct nv04_miptree *)zeta->base.texture; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } void nv04_emit_hw_state(struct nv04_context *nv04) { + struct nv04_screen *screen = nv04->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *fahrenheit = screen->fahrenheit; + struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d; int i; if (nv04->dirty & NV04_NEW_VERTPROG) { @@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); - OUT_RING(nv04->dsa->control); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(chan, nv04->dsa->control); } if (nv04->dirty & NV04_NEW_BLEND) { @@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04) unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch; unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch; - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); - OUT_RING(rt_pitch|(zeta_pitch<<16)); - OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(chan, rt_pitch|(zeta_pitch<<16)); + OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv04->zeta) { - BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } /* Texture images */ @@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); - OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer); + BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } } diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index 099ab10043..3484771814 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv04_draw_elements( struct pipe_context *pipe, +void nv04_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv04_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { printf("coucou in draw arrays\n"); - return nv04_draw_elements(pipe, NULL, 0, prim, start, count); + nv04_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 0dadeb03dd..1ecb73d06e 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv10->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10) { struct nv10_screen *screen = nv10->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; float projectionmatrix[16]; - BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); - BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); + BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0x7ff<<16)|0x800); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0x7ff<<16)|0x800); for (i=1;i<8;i++) { - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, 0x290, 1); - OUT_RING ((0x10<<16)|1); - BEGIN_RING(celsius, 0x3f4, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, 0x290, 1); + OUT_RING (chan, (0x10<<16)|1); + BEGIN_RING(chan, celsius, 0x3f4, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); if (nv10->screen->celsius->grclass != NV10TCL) { /* For nv11, nv17 */ - BEGIN_RING(celsius, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, celsius, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); /* Set state */ - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (0x207); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); - OUT_RING (0); - OUT_RING (0); - - BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); - OUT_RING (0x30141010); - OUT_RING (0); - OUT_RING (0x20040000); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0x00000c00); - OUT_RING (0x18000000); - OUT_RING (0x300e0300); - OUT_RING (0x0c091c80); - - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); - OUT_RING (1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); - OUT_RING (1); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0x8006); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); - OUT_RING (0xff); - OUT_RING (0x207); - OUT_RING (0); - OUT_RING (0xff); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1e00); - OUT_RING (0x1d01); - BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (0x201); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (0x1b02); - OUT_RING (0x1b02); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (0x405); - OUT_RING (0x901); - BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + + BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (chan, 0x30141010); + OUT_RING (chan, 0); + OUT_RING (chan, 0x20040000); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0x18000000); + OUT_RING (chan, 0x300e0300); + OUT_RING (chan, 0x0c091c80); + + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x8006); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x207); + OUT_RING (chan, 0); + OUT_RING (chan, 0xff); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1e00); + OUT_RING (chan, 0x1d01); + BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, 0x201); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, 0x1b02); + OUT_RING (chan, 0x1b02); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, 0x405); + OUT_RING (chan, 0x901); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8); for (i=0;i<8;i++) { - OUT_RING (0); + OUT_RING (chan, 0); } - BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RING (0x3fc00000); /* -1.50 */ - OUT_RING (0xbdb8aa0a); /* -0.09 */ - OUT_RING (0); /* 0.00 */ + BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (chan, 0x3fc00000); /* -1.50 */ + OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */ + OUT_RING (chan, 0); /* 0.00 */ - BEGIN_RING(celsius, NV10TCL_NOP, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_NOP, 1); + OUT_RING (chan, 0); - BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); - OUT_RING (0x802); - OUT_RING (2); + BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (chan, 0x802); + OUT_RING (chan, 2); /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when * using texturing, except when using the texture matrix */ - BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); - OUT_RING (6); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (0x01010101); + BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (chan, 6); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x01010101); /* Set vertex component */ - BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); - OUT_RING (0); - OUT_RING (0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); - OUT_RINGf (0.0); - BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (chan, 0.0); + BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); memset(projectionmatrix, 0, sizeof(projectionmatrix)); - BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 1.0; projectionmatrix[3*4+3] = 1.0; for (i=0;i<16;i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); - OUT_RING (0.0); - OUT_RINGf (16777216.0); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (chan, 0.0); + OUT_RINGf (chan, 16777216.0); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (-2048.0); - OUT_RINGf (-2048.0); - OUT_RINGf (16777215.0 * 0.5); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, -2048.0); + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RING (chan, 0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h index 36a6aa7a74..ab4b825487 100644 --- a/src/gallium/drivers/nv10/nv10_context.h +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv10_screen *ctx = nv10->screen -#include "nouveau/nouveau_push.h" - #include "nv10_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -144,9 +140,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10); extern void nv10_state_tex_update(struct nv10_context *nv10); /* nv10_vbo.c */ -extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv10_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv10_draw_elements( struct pipe_context *pipe, +extern void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c index 906fdfeeb9..c1f7ccb9ab 100644 --- a/src/gallium/drivers/nv10/nv10_fragtex.c +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; struct pipe_texture *pt = &nv10mt->base; struct nv10_texture_format *tf; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; uint32_t txf, txs, txp; tf = nv10_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit) return; } - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10) { #if 0 struct nv10_fragment_program *fp = nv10->fragprog.active; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; unsigned samplers, unit; samplers = nv10->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv10->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index 7ba9777a22..c5dbe43dbc 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -67,12 +67,15 @@ struct nv10_vbuf_render { void nv10_vtxbuf_bind( struct nv10_context* nv10 ) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int i; for(i = 0; i < 8; i++) { - BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv10->vtxbuf*/); - BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv10->vtxbuf*/); + BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1); + OUT_RING(chan, 0/*XXX*/); } } @@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render, { struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); struct nv10_context *nv10 = nv10_render->nv10; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; int push, i; nv10_emit_hw_state(nv10); - BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv10_render->hwprim); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv10_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 6a39ddeaac..69a6dab866 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -180,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->celsius, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 2577ab73b5..30a596ca60 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -4,25 +4,32 @@ static void nv10_state_emit_blend(struct nv10_context* nv10) { struct nv10_blend_state *b = nv10->blend; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); - OUT_RING (b->b_enable); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (chan, b->b_enable); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv10_state_emit_blend_color(struct nv10_context* nv10) { struct pipe_blend_color *c = nv10->blend_color; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10) static void nv10_state_emit_rast(struct nv10_context* nv10) { struct nv10_rasterizer_state *r = nv10->rast; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv10_state_emit_dsa(struct nv10_context* nv10) { struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; - BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); #if 0 - BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv10_state_emit_viewport(struct nv10_context* nv10) @@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) int colour_format = 0, zeta_format = 0; struct nv10_miptree *nv10mt = 0; + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; colour_format = fb->cbufs[0]->format; @@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) } if (zeta) { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv10mt = (struct nv10_miptree *)rt->base.texture; @@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) nv10->zeta = nv10mt->buffer; } - BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); - OUT_RING (rt_format); - BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0 | 0x08000800); - OUT_RING (((h - 1) << 16) | 0 | 0x08000800); + BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); + OUT_RING (chan, rt_format); + BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (chan, ((h - 1) << 16) | 0 | 0x08000800); } static void nv10_vertex_layout(struct nv10_context *nv10) @@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10) void nv10_emit_hw_state(struct nv10_context *nv10) { + struct nv10_screen *screen = nv10->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *celsius = screen->celsius; + struct nouveau_bo *rt_bo; int i; if (nv10->dirty & NV10_NEW_VERTPROG) { @@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10) */ /* Render target */ + rt_bo = nouveau_bo(nv10->rt[0]); // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly -// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); -// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv10->zeta) { + struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta); // XXX -// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); -// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +// BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv10->zeta + lma_offset);*/ +/* BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv10->fp_samplers & (1 << i))) continue; - BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer); + BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv10->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, NV10TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 0d26141248..9180c72c9b 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv10_draw_elements( struct pipe_context *pipe, +void nv10_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -65,14 +65,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, pipe_buffer_unmap(pscreen, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } - - return TRUE; } -boolean nv10_draw_arrays( struct pipe_context *pipe, - unsigned prim, unsigned start, unsigned count) +void nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) { - return nv10_draw_elements(pipe, NULL, 0, prim, start, count); + nv10_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index 6a147a4159..5b80af2d22 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; draw_flush(nv20->draw); - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void @@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20) { struct nv20_screen *screen = nv20->screen; struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; float projectionmatrix[16]; - const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + const boolean is_nv25tcl = (kelvin->grclass == NV25TCL); - BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); - OUT_RING (screen->sync->handle); - BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->gart->handle); /* TEXTURE1 */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); - OUT_RING (chan->vram->handle); - OUT_RING (chan->vram->handle); /* ZETA */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (chan, screen->sync->handle); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->gart->handle); /* TEXTURE1 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); /* ZETA */ - BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); - OUT_RING (0); /* renouveau: beef0351, unique */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (chan, 0); /* renouveau: beef0351, unique */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING (0); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); - OUT_RING ((0xfff << 16) | 0x0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); - OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING (chan, (0xfff << 16) | 0x0); for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x17e0, 3); - OUT_RINGf (0.0); - OUT_RINGf (0.0); - OUT_RINGf (1.0); + BEGIN_RING(chan, kelvin, 0x17e0, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); } else { - BEGIN_RING(kelvin, 0x1e68, 1); - OUT_RING (0x4b800000); /* 16777216.000000 */ - BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); - OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + BEGIN_RING(chan, kelvin, 0x1e68, 1); + OUT_RING (chan, 0x4b800000); /* 16777216.000000 */ + BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (chan, NV20TCL_TX_RCOMP_LEQUAL); } - BEGIN_RING(kelvin, 0x290, 1); - OUT_RING ((0x10 << 16) | 1); - BEGIN_RING(kelvin, 0x9fc, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x1d80, 1); - OUT_RING (1); - BEGIN_RING(kelvin, 0x9f8, 1); - OUT_RING (4); - BEGIN_RING(kelvin, 0x17ec, 3); - OUT_RINGf (0.0); - OUT_RINGf (1.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, 0x290, 1); + OUT_RING (chan, (0x10 << 16) | 1); + BEGIN_RING(chan, kelvin, 0x9fc, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x1d80, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, 0x9f8, 1); + OUT_RING (chan, 4); + BEGIN_RING(chan, kelvin, 0x17ec, 3); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 1.0); + OUT_RINGf (chan, 0.0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d88, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d88, 1); + OUT_RING (chan, 3); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); - OUT_RING (chan->vram->handle); - BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); - OUT_RING (chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (chan, chan->vram->handle); + BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (chan, chan->vram->handle); } - BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); - OUT_RING (0); /* renouveau: beef1e10 */ + BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (chan, 0); /* renouveau: beef1e10 */ - BEGIN_RING(kelvin, 0x1e98, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1e98, 1); + OUT_RING (chan, 0); #if 0 if (is_nv25tcl) { - BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ - OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (chan, NvDmaFB); /* renouveau: beef0201 */ - BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); - OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (chan, NvDmaTT); /* renouveau: beef0202 */ } #endif - BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (chan, 0); - BEGIN_RING(kelvin, 0x120, 3); - OUT_RING (0); - OUT_RING (1); - OUT_RING (2); + BEGIN_RING(chan, kelvin, 0x120, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 2); /* error: ILLEGAL_MTHD, PROTECTION_FAULT - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); - OUT_RINGf (512.0); - OUT_RINGf (0.0); - OUT_RINGf (0.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 512.0); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x022c, 2); - OUT_RING (0x280); - OUT_RING (0x07d28000); + BEGIN_RING(chan, kelvin, 0x022c, 2); + OUT_RING (chan, 0x280); + OUT_RING (chan, 0x07d28000); } /* * illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c2c, 1); - OUT_RING (0); */ + BEGIN_RING(chan, NvSub3D, 0x1c2c, 1); + OUT_RING (chan, 0); */ if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1da4, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, 0x1da4, 1); + OUT_RING (chan, 0); } /* * crashes with illegal method, protection fault - BEGIN_RING(NvSub3D, 0x1c18, 1); - OUT_RING (0x200); */ + BEGIN_RING(chan, NvSub3D, 0x1c18, 1); + OUT_RING (chan, 0x200); */ - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); - OUT_RING ((0 << 16) | 0); - OUT_RING ((0 << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (chan, (0 << 16) | 0); + OUT_RING (chan, (0 << 16) | 0); /* *** Set state *** */ - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); - OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_ALPHA_FUNC_REF */ for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); - OUT_RING (0x30d410d0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); - OUT_RING (0x00011101); - BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); - OUT_RING (0x130e0300); - OUT_RING (0x0c091c80); - BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); - OUT_RING (0x00000c00); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); - OUT_RING (0x20c400c0); - OUT_RING (0); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); - OUT_RING (0); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); - OUT_RING (0x035125a0); - OUT_RING (0); - OUT_RING (0x40002000); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); - OUT_RING (0xffff0000); - - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); - OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); - OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); - OUT_RING (0); /* NV20TCL_BLEND_COLOR */ - OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RING (0xff); - OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); - OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ - OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ - OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); - OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); - - BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); - OUT_RING (0); - OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); - BEGIN_RING(kelvin, 0x17cc, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (chan, 0x30d410d0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (chan, 0x00011101); + BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (chan, 0x130e0300); + OUT_RING (chan, 0x0c091c80); + BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (chan, 0x00000c00); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (chan, 0x20c400c0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (chan, 0x035125a0); + OUT_RING (chan, 0); + OUT_RING (chan, 0x40002000); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (chan, 0xffff0000); + + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (chan, NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (chan, NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (chan, 0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (chan, 0xff); + OUT_RING (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (chan, 0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (chan, 0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (chan, NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(chan, kelvin, 0x17cc, 1); + OUT_RING (chan, 0); if (is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 1); } - BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); - OUT_RING (0x00020000); - BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (chan, 0x00020000); + BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { - OUT_RING(0xffffffff); + OUT_RING(chan, 0xffffffff); } - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ - OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (NV20TCL_DEPTH_FUNC_LESS); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); - OUT_RINGf (0.0); - OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (chan, 0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); if (!is_nv25tcl) { - BEGIN_RING(kelvin, 0x1d84, 1); - OUT_RING (3); + BEGIN_RING(chan, kelvin, 0x1d84, 1); + OUT_RING (chan, 3); } - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); if (!is_nv25tcl) { - OUT_RING (8); + OUT_RING (chan, 8); } else { - OUT_RINGf (1.0); + OUT_RINGf (chan, 1.0); } if (!is_nv25tcl) { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.POINT_SMOOTH_ENABLE */ } else { - BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, 0x0a1c, 1); - OUT_RING (0x800); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, 0x0a1c, 1); + OUT_RING (chan, 0x800); } - BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); - OUT_RING (8); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); - OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (NV20TCL_CULL_FACE_BACK); - OUT_RING (NV20TCL_FRONT_FACE_CCW); - BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); - OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); - BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); - OUT_RING (0); - BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (chan, 8); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (chan, NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, NV20TCL_CULL_FACE_BACK); + OUT_RING (chan, NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (chan, NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { - OUT_RING(0); + OUT_RING(chan, 0); } - BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); - OUT_RINGf (1.5); - OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ - OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ - BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); - OUT_RING (NV20TCL_FOG_MODE_EXP_2); - OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); - BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); - OUT_RING (0); - OUT_RING (0); /* NV20TCL.FOG_COLOR */ - BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); - OUT_RING (NV20TCL_ENGINE_FIXED); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (chan, 1.5); + OUT_RINGf (chan, -0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (chan, 0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (chan, NV20TCL_FOG_MODE_EXP_SIGNED); + OUT_RING (chan, NV20TCL_FOG_COORD_FOG); + BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (chan, 0); + OUT_RING (chan, 0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1); + OUT_RING (chan, NV20TCL_ENGINE_FIXED); for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { - BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (chan, 0); } - BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); - OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); - OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); + OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); for (i = 4; i < 16; ++i) { - OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 0.0); + OUT_RINGf(chan, 1.0); } - BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); - OUT_RING (1); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (0x00010101); - BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, 0x00010101); + BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (chan, 0); memset(projectionmatrix, 0, sizeof(projectionmatrix)); projectionmatrix[0*4+0] = 1.0; projectionmatrix[1*4+1] = 1.0; projectionmatrix[2*4+2] = 16777215.0; projectionmatrix[3*4+3] = 1.0; - BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); for (i = 0; i < 16; i++) { - OUT_RINGf (projectionmatrix[i]); + OUT_RINGf (chan, projectionmatrix[i]); } - BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); - OUT_RINGf (0.0); - OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); - OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ - OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ - OUT_RINGf (0.0); - OUT_RINGf (16777215.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (chan, 0.0); + OUT_RINGf (chan, 16777215.0); - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); - OUT_RINGf (0.0); /* no effect?, w/2 */ - OUT_RINGf (0.0); /* no effect?, h/2 */ - OUT_RINGf (16777215.0 * 0.5); - OUT_RINGf (65535.0); + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (chan, 0.0); /* no effect?, w/2 */ + OUT_RINGf (chan, 0.0); /* no effect?, h/2 */ + OUT_RINGf (chan, 16777215.0 * 0.5); + OUT_RINGf (chan, 65535.0); - FIRE_RING (NULL); + FIRE_RING (chan); } struct pipe_context * diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h index a4eaa95660..c7dfadaa31 100644 --- a/src/gallium/drivers/nv20/nv20_context.h +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -15,10 +15,6 @@ #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv20_screen *ctx = nv20->screen -#include "nouveau/nouveau_push.h" - #include "nv20_state.h" #define NOUVEAU_ERR(fmt, args...) \ @@ -143,9 +139,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20); extern void nv20_state_tex_update(struct nv20_context *nv20); /* nv20_vbo.c */ -extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv20_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv20_draw_elements( struct pipe_context *pipe, +extern void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count); diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c index 2db4a4015a..dedbec73f3 100644 --- a/src/gallium/drivers/nv20/nv20_fragtex.c +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; struct pipe_texture *pt = &nv20mt->base; struct nv20_texture_format *tf; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t txf, txs, txp; tf = nv20_fragtex_format(pt->format); @@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit) return; } - BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); - OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); - OUT_RING (ps->wrap); - OUT_RING (0x40000000); /* enable */ - OUT_RING (txs); - OUT_RING (ps->filt | 0x2000 /* magic */); - OUT_RING ((pt->width0 << 16) | pt->height0); - OUT_RING (ps->bcol); + BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (chan, ps->wrap); + OUT_RING (chan, 0x40000000); /* enable */ + OUT_RING (chan, txs); + OUT_RING (chan, ps->filt | 0x2000 /* magic */); + OUT_RING (chan, (pt->width0 << 16) | pt->height0); + OUT_RING (chan, ps->bcol); #endif } @@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20) { #if 0 struct nv20_fragment_program *fp = nv20->fragprog.active; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; unsigned samplers, unit; samplers = nv20->fp_samplers & ~fp->samplers; @@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (chan, 0); } samplers = nv20->dirty_samplers & fp->samplers; diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index ddfcdb8057..2e145672da 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render) void nv20_vtxbuf_bind( struct nv20_context* nv20 ) { #if 0 + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int i; for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { - BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); - OUT_RING(0/*nv20->vtxbuf*/); - BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); - OUT_RING(0/*XXX*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(chan, 0/*nv20->vtxbuf*/); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(chan, 0/*XXX*/); } #endif } @@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) static unsigned nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; uint32_t hwfmt = 0; unsigned fields; @@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) return 0; } - BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); - OUT_RING(hwfmt); + BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(chan, hwfmt); return fields; } @@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; struct vertex_info *vinfo = &nv20->vertex_info; unsigned nr_fields; int max_push; @@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, nr_fields = nv20__emit_vertex_array_format(nv20); - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); max_push = 1200 / nr_fields; while (nr_indices) { int i; int push = MIN2(nr_indices, max_push); - BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); for (i = 0; i < push; i++) { /* XXX: fixme to handle other than floats? */ int f = nr_fields; float *attrv = (float*)&data[indices[i] * vsz]; while (f-- > 0) - OUT_RINGf(*attrv++); + OUT_RINGf(chan, *attrv++); } nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); - OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); + BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP); } static void @@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, uint nr_indices) { struct nv20_context *nv20 = nv20_render->nv20; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; int push, i; NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); - BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); - OUT_RELOCl(nv20_render->pbuffer, 0, + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING(nv20_render->hwprim); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(chan, nv20_render->hwprim); if (nr_indices & 1) { - BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); - OUT_RING (indices[0]); + BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, indices[0]); indices++; nr_indices--; } @@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, // XXX too big/small ? check the size push = MIN2(nr_indices, 1200 * 2); - BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((indices[i+1] << 16) | indices[i]); + OUT_RING(chan, (indices[i+1] << 16) | indices[i]); nr_indices -= push; indices += push; } - BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (chan, 0); } static void diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index a0973f1ebd..d091335063 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -176,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->kelvin, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 63cba1f412..6bbd1fdae9 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -5,27 +5,34 @@ static void nv20_state_emit_blend(struct nv20_context* nv20) { struct nv20_blend_state *b = nv20->blend; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); - OUT_RING (b->d_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (chan, b->d_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); - OUT_RING (b->b_enable); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (chan, b->b_enable); - BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); - OUT_RING (b->b_srcfunc); - OUT_RING (b->b_dstfunc); + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (chan, b->b_srcfunc); + OUT_RING (chan, b->b_dstfunc); - BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); - OUT_RING (b->c_mask); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (chan, b->c_mask); } static void nv20_state_emit_blend_color(struct nv20_context* nv20) { struct pipe_blend_color *c = nv20->blend_color; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); - OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING (chan, + (float_to_ubyte(c->color[3]) << 24)| (float_to_ubyte(c->color[0]) << 16)| (float_to_ubyte(c->color[1]) << 8) | (float_to_ubyte(c->color[2]) << 0)); @@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20) static void nv20_state_emit_rast(struct nv20_context* nv20) { struct nv20_rasterizer_state *r = nv20->rast; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); - OUT_RING (r->shade_model); - OUT_RING (r->line_width); + BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (chan, r->shade_model); + OUT_RING (chan, r->line_width); - BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); - OUT_RING (r->point_size); + BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (chan, r->point_size); - BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); - OUT_RING (r->poly_mode_front); - OUT_RING (r->poly_mode_back); + BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (chan, r->poly_mode_front); + OUT_RING (chan, r->poly_mode_back); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); - OUT_RING (r->cull_face); - OUT_RING (r->front_face); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (chan, r->cull_face); + OUT_RING (chan, r->front_face); - BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); - OUT_RING (r->line_smooth_en); - OUT_RING (r->poly_smooth_en); + BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (chan, r->line_smooth_en); + OUT_RING (chan, r->poly_smooth_en); - BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); - OUT_RING (r->cull_face_en); + BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (chan, r->cull_face_en); } static void nv20_state_emit_dsa(struct nv20_context* nv20) { struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; - BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); - OUT_RING (d->depth.func); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (chan, d->depth.func); - BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); - OUT_RING (d->depth.write_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (chan, d->depth.write_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); - OUT_RING (d->depth.test_enable); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (chan, d->depth.test_enable); - BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); - OUT_RING (1); + BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (chan, 1); #if 0 - BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); - OUT_RING (d->stencil.enable); - BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); - OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (chan, d->stencil.enable); + BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7); #endif - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); - OUT_RING (d->alpha.enabled); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (chan, d->alpha.enabled); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); - OUT_RING (d->alpha.func); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (chan, d->alpha.func); - BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); - OUT_RING (d->alpha.ref); + BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (chan, d->alpha.ref); } static void nv20_state_emit_viewport(struct nv20_context* nv20) @@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20) { /* NV20TCL_SCISSOR_* is probably a software method */ /* struct pipe_scissor_state *s = nv20->scissor; - BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); - OUT_RING (((s->maxx - s->minx) << 16) | s->minx); - OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + + BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (chan, ((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (chan, ((s->maxy - s->miny) << 16) | s->miny);*/ } static void nv20_state_emit_framebuffer(struct nv20_context* nv20) @@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; struct nv20_miptree *nv20mt = 0; + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) } if (zeta) { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (zeta->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (zeta->pitch << 16)); } else { - BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); - OUT_RING (rt->pitch | (rt->pitch << 16)); + BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (chan, rt->pitch | (rt->pitch << 16)); } nv20mt = (struct nv20_miptree *)rt->base.texture; @@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) nv20->zeta = nv20mt->buffer; } - BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); - OUT_RING ((w << 16) | 0); - OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ - OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ - BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); - OUT_RING (((w - 1) << 16) | 0); - OUT_RING (((h - 1) << 16) | 0); + BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING (chan, (w << 16) | 0); + OUT_RING (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (chan, rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (chan, ((w - 1) << 16) | 0); + OUT_RING (chan, ((h - 1) << 16) | 0); } static void nv20_vertex_layout(struct nv20_context *nv20) @@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20) void nv20_emit_hw_state(struct nv20_context *nv20) { + struct nv20_screen *screen = nv20->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *kelvin = screen->kelvin; + struct nouveau_bo *rt_bo; int i; if (nv20->dirty & NV20_NEW_VERTPROG) { @@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20) */ /* Render target */ - BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + rt_bo = nouveau_bo(nv20->rt[0]); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (nv20->zeta) { - BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); - OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); - OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* XXX for when we allocate LMA on nv17 */ -/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); - OUT_RELOCl(nv20->zeta + lma_offset);*/ +/* BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/ } /* Vertex buffer */ - BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); - OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); - OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); /* Texture images */ for (i = 0; i < 2; i++) { if (!(nv20->fp_samplers & (1 << i))) continue; - BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); - OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer); + BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); - OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(chan, bo, nv20->tex[i].format, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, NV20TCL_TX_FORMAT_DMA1); diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 4bf461eba9..52991a0d85 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -9,7 +9,7 @@ #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" -boolean nv20_draw_elements( struct pipe_context *pipe, +void nv20_draw_elements( struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned prim, unsigned start, unsigned count) @@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, } draw_flush(nv20->draw); - return TRUE; } -boolean nv20_draw_arrays( struct pipe_context *pipe, +void nv20_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return nv20_draw_elements(pipe, NULL, 0, prim, start, count); + nv20_draw_elements(pipe, NULL, 0, prim, start, count); } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 38b39159f1..54572e9ab3 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -10,15 +10,20 @@ nv30_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(rankine, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, rankine, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 864ddaeb59..e59449287b 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv30_screen *ctx = nv30->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv30_state.h" @@ -198,9 +194,9 @@ extern struct nv30_state_entry nv30_state_fragtex; extern struct nv30_state_entry nv30_state_vbo; /* nv30_vbo.c */ -extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv30_draw_elements(struct pipe_context *pipe, +extern void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index d1ff18e2df..2d565cb631 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -837,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); - so = so_new(8, 1); + so = so_new(4, 4, 1); so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index b3293ee700..9893567891 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(1, 8, 2); so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index 1d1c8a484e..e27e9ccbf6 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv30->screen->query, q->object->start); - BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -69,12 +72,15 @@ static void nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_query *q = nv30_query(pq); - BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 760467f736..9ed48178dc 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -233,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->rankine, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -270,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static rankine initialisation */ - so = so_new(128, 0); + so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index e6321b480f..a80dfb0488 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe, struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_grobj *rankine = nv30->screen->rankine; struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); @@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(9, 19, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; /*XXX: ignored: @@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(5, 21, 0); struct nouveau_grobj *rankine = nv30->screen->rankine; so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c index 64cf9ae93a..c36d58c040 100644 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = { static boolean nv30_state_blend_colour_validate(struct nv30_context *nv30) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv30->blend_colour; so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 6f6d1740d6..2ed2ea55e8 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) struct nv04_surface *rt[2], *zeta = NULL; uint32_t rt_enable = 0, rt_format = 0; int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(12, 18, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c index 3ac7a8471e..ba61a9e24a 100644 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30) return FALSE; nv30->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); if (nv30->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c index d0c791ac08..ed520a4f43 100644 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv30->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c index c3eb413dac..2d7781292b 100644 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30) return FALSE; nv30->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(3, 10, 0); if (!bypass) { so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index e32b8141af..1c5db03ea2 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -163,19 +163,21 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv30_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; nv30_vbo_set_idxbuf(nv30, NULL, 0); if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } while (count) { @@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; @@ -228,7 +230,9 @@ static INLINE void nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv30_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_channel *chan = nv30->screen->base.channel; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; unsigned restart = 0; while (count) { @@ -412,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv30_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -461,7 +468,7 @@ nv30_draw_elements(struct pipe_context *pipe, if (FORCE_SWTNL || !nv30_state_validate(nv30)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ - return FALSE; + return; } if (idxbuf) { @@ -472,7 +479,6 @@ nv30_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -485,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); for (hw = 0; hw < nv30->vtxelt_nr; hw++) { @@ -500,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 5d60984622..e77a5be3f2 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -650,7 +650,9 @@ static boolean nv30_vertprog_validate(struct nv30_context *nv30) { struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_screen *screen = nv30->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *rankine = screen->rankine; struct nv30_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -684,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) assert(0); } - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_ref(so, &vp->so); @@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30) 4 * sizeof(float)); } - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30) vp->insns[i].data[2], vp->insns[i].data[3]); } #endif - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index d56c7a6b49..f79ae4db84 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -10,15 +10,20 @@ nv40_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) { struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (2); - BEGIN_RING(curie, 0x1fd8, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, 0x1fd8, 1); + OUT_RING (chan, 1); } - FIRE_RING(fence); + FIRE_RING(chan); + if (fence) + *fence = NULL; } static void diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 83fcf1785d..e219bb537a 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -14,10 +14,6 @@ #include "nouveau/nouveau_winsys.h" #include "nouveau/nouveau_gldefs.h" #include "nouveau/nouveau_context.h" - -#define NOUVEAU_PUSH_CONTEXT(ctx) \ - struct nv40_screen *ctx = nv40->screen -#include "nouveau/nouveau_push.h" #include "nouveau/nouveau_stateobj.h" #include "nv40_state.h" @@ -183,7 +179,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned ib_size, unsigned mode, unsigned start, unsigned count); @@ -219,9 +215,9 @@ extern struct nv40_state_entry nv40_state_vbo; extern struct nv40_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ -extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv40_draw_elements(struct pipe_context *pipe, +extern void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 3875bc3545..d826f8c2f5 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage) static INLINE void nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) { + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; for (i = 0; i < nv40->swtnl.nr_attribs; i++) { @@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) case EMIT_OMIT: break; case EMIT_1F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (fui(v->data[idx][0])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); break; case EMIT_2F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); break; case EMIT_3F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); break; case EMIT_4F: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (fui(v->data[idx][0])); - OUT_RING (fui(v->data[idx][1])); - OUT_RING (fui(v->data[idx][2])); - OUT_RING (fui(v->data[idx][3])); + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); break; case EMIT_4UB: - BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), float_to_ubyte(v->data[idx][1]), float_to_ubyte(v->data[idx][2]), float_to_ubyte(v->data[idx][3]))); @@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, { struct nv40_render_stage *rs = nv40_render_stage(stage); struct nv40_context *nv40 = rs->nv40; - struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf; + + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_pushbuf *pb = chan->pushbuf; + struct nouveau_grobj *curie = screen->curie; unsigned i; /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ @@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, NOUVEAU_ERR("AIII, missed flush\n"); assert(0); } - FIRE_RING(NULL); + FIRE_RING(chan); nv40_state_emit(nv40); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (mode); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, mode); rs->prim = mode; } @@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, * off the primitive now. */ if (pb->remaining < ((count * 20) + 6)) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags) { struct nv40_render_stage *rs = nv40_render_stage(draw); struct nv40_context *nv40 = rs->nv40; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (NV40TCL_BEGIN_END_STOP); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, NV40TCL_BEGIN_END_STOP); rs->prim = NV40TCL_BEGIN_END_STOP; } } @@ -226,7 +236,7 @@ nv40_draw_render_stage(struct nv40_context *nv40) return &render->stage; } -boolean +void nv40_draw_elements_swtnl(struct pipe_context *pipe, struct pipe_buffer *idxbuf, unsigned idxbuf_size, unsigned mode, unsigned start, unsigned count) @@ -237,7 +247,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, void *map; if (!nv40_state_validate_swtnl(nv40)) - return FALSE; + return; nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); @@ -278,8 +288,6 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); - - return TRUE; } static INLINE void diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index bb9c85cc43..1237066c39 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -919,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); - so = so_new(4, 1); + so = so_new(2, 2, 1); so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 44abc84596..aad9198210 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; - so = so_new(16, 2); + so = so_new(2, 9, 2); so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, @@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40) unit = ffs(samplers) - 1; samplers &= ~(1 << unit); - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); so_data (so, 0); so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 7874aedd42..8ed4a67dd0 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); @@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) assert(0); nouveau_notifier_reset(nv40->screen->query, q->object->start); - BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (1); - BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); q->ready = FALSE; } @@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; - BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); - OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(NULL); + FIRE_RING(chan); } static boolean diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index d01e712805..9e55e5a089 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -215,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } - BIND_RING(chan, screen->curie, 7); /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); @@ -252,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static curie initialisation */ - so = so_new(128, 0); + so = so_new(16, 25, 0); so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index ed55d29aff..ed0ca9e02c 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(16, 0); + struct nouveau_stateobj *so = so_new(5, 8, 0); if (cso->blend_enable) { so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); @@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(8, 18, 0); struct nouveau_grobj *curie = nv40->screen->curie; /*XXX: ignored: @@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_stateobj *so = so_new(4, 21, 0); struct nouveau_grobj *curie = nv40->screen->curie; so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c index 8cd05ce66e..3ff00a37f6 100644 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = { static boolean nv40_state_blend_colour_validate(struct nv40_context *nv40) { - struct nouveau_stateobj *so = so_new(2, 0); + struct nouveau_stateobj *so = so_new(1, 1, 0); struct pipe_blend_color *bcol = &nv40->blend_colour; so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 789ed16126..13fe854915 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40, void nv40_state_emit(struct nv40_context *nv40) { - struct nouveau_channel *chan = nv40->screen->base.channel; struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned i; uint64_t states; @@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40) if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (2); - BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (1); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); } state->dirty = 0; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 1c7a7cd64f..a58fe9ddb1 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) struct nv04_surface *rt[4], *zeta; uint32_t rt_enable, rt_format; int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(64, 10); + struct nouveau_stateobj *so = so_new(18, 24, 10); unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c index cf58d33906..753a505e93 100644 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40) return FALSE; nv40->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); if (nv40->state.scissor_enabled) { so_data (so, ((s->maxx - s->minx) << 16) | s->minx); diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c index b51024ad9b..2b371ebfec 100644 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40) if (rast->poly_stipple_enable) { unsigned i; - so = so_new(35, 0); + so = so_new(2, 33, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 1); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, nv40->stipple[i]); } else { - so = so_new(2, 0); + so = so_new(1, 1, 0); so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c index 665d2d5fca..9919ba1d0b 100644 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40) return FALSE; nv40->state.viewport_bypass = bypass; - so = so_new(11, 0); + so = so_new(2, 9, 0); if (!bypass) { so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index af3fcf6a34..a777898f68 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -164,18 +164,21 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, return TRUE; } -boolean +void nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; nv40_vbo_set_idxbuf(nv40, NULL, 0); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { @@ -186,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -206,29 +209,30 @@ nv40_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } pipe->flush(pipe, 0, NULL); - return TRUE; } static INLINE void nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint8_t *elts = (uint8_t *)ib + start; @@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -277,7 +281,9 @@ static INLINE void nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint16_t *elts = (uint16_t *)ib + start; @@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (elts[0]); + BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); elts++; vc--; } @@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) - OUT_RING((elts[i+1] << 16) | elts[i]); + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } @@ -326,7 +332,9 @@ static INLINE void nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; while (count) { uint32_t *elts = (uint32_t *)ib + start; @@ -337,32 +345,32 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, mode, start, count, &restart); if (vc == 0) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } count -= vc; - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (elts, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); start = restart; } } -static boolean +static void nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) @@ -393,15 +401,16 @@ nv40_draw_elements_inline(struct pipe_context *pipe, } pipe_buffer_unmap(pscreen, ib); - return TRUE; } -static boolean +static void nv40_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_channel *chan = nv40->screen->base.channel; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; unsigned restart; while (count) { @@ -412,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, mode, start, count, &restart); if (!vc) { - FIRE_RING(NULL); + FIRE_RING(chan); continue; } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (nvgl_primitive(mode)); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (((nr - 1) << 24) | start); + BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -432,24 +441,22 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); while (push--) { - OUT_RING(((0x100 - 1) << 24) | start); + OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); - OUT_RING (0); + BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); + OUT_RING (chan, 0); count -= vc; start = restart; } - - return TRUE; } -boolean +void nv40_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -459,8 +466,9 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - return nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); + nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } if (idxbuf) { @@ -471,7 +479,6 @@ nv40_draw_elements(struct pipe_context *pipe, } pipe->flush(pipe, 0, NULL); - return TRUE; } static boolean @@ -484,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40) unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; - vtxbuf = so_new(20, 18); + vtxbuf = so_new(3, 17, 18); so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(17, 0); + vtxfmt = so_new(1, 16, 0); so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); for (hw = 0; hw < nv40->vtxelt_nr; hw++) { @@ -499,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40) if (!vb->stride) { if (!sattr) - sattr = so_new(16 * 5, 0); + sattr = so_new(16, 16 * 4, 0); if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index d9fc31006f..8d80fcad38 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -834,7 +834,9 @@ static boolean nv40_vertprog_validate(struct nv40_context *nv40) { struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_screen *screen = nv40->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *curie = screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -884,7 +886,7 @@ check_gpu_resources: assert(0); } - so = so_new(7, 0); + so = so_new(3, 4, 0); so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); @@ -974,9 +976,9 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (i + vp->data->start); - OUT_RINGp ((uint32_t *)vpd->value, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) @@ -993,11 +995,11 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (vp->exec->start); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (vp->insns[i].data, 4); + BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); } } diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5578a5838f..cbd4c3ff86 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); /* nv50_vbo.c */ -extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +extern void nv50_draw_arrays(struct pipe_context *, unsigned mode, unsigned start, unsigned count); -extern boolean nv50_draw_elements(struct pipe_context *pipe, +extern void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2d0b1818ef..e16fa479e5 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -96,7 +96,11 @@ struct nv50_reg { #define NV50_MOD_NEG 1 #define NV50_MOD_ABS 2 +#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS) #define NV50_MOD_SAT 4 +#define NV50_MOD_I32 8 + +/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */ /* STACK: Conditionals and loops have to use the (per warp) stack. * Stack entries consist of an entry type (divergent path, join at), @@ -134,6 +138,7 @@ struct nv50_pc { uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -241,7 +246,8 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); } static INLINE struct nv50_reg * @@ -281,7 +287,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } @@ -343,23 +350,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -421,6 +434,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -776,7 +791,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -795,7 +810,7 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) e->inst[1] |= 0x00200000; } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -811,7 +826,7 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -819,7 +834,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x00800000)); if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -841,7 +856,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -849,7 +864,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_CONST || src->type == P_IMMD) { assert(!(e->inst[0] & 0x01000000)); if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -864,6 +879,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + +static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { struct nv50_program_exec *e = exec(pc); @@ -967,6 +1002,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, emit(pc, e); } +#define NV50_MAX_F32 0x880 +#define NV50_MAX_S32 0x08c +#define NV50_MAX_U32 0x084 +#define NV50_MIN_F32 0x8a0 +#define NV50_MIN_S32 0x0ac +#define NV50_MIN_U32 0x0a4 + static void emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1) @@ -974,8 +1016,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); set_long(pc, e); - e->inst[0] |= 0xb0000000; - e->inst[1] |= (sub << 29); + e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20); + e->inst[1] |= (sub << 24); check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); @@ -1039,6 +1081,69 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + +static void +emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4000000; + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (src1->type == P_IMMD) { + e->inst[1] |= (1 << 20); + e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16; + } else + set_src_1(pc, src1, e); + + if (dir != TGSI_OPCODE_SHL) + e->inst[1] |= (1 << 29); + + if (dir == TGSI_OPCODE_ISHR) + e->inst[1] |= (1 << 27); + + emit(pc, e); +} + +static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { @@ -1142,36 +1247,41 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) emit(pc, e); } -#define CVTOP_RN 0x01 -#define CVTOP_FLOOR 0x03 -#define CVTOP_CEIL 0x05 -#define CVTOP_TRUNC 0x07 -#define CVTOP_SAT 0x08 -#define CVTOP_ABS 0x10 - -/* 0x04 == 32 bit dst */ -/* 0x40 == dst is float */ -/* 0x80 == src is float */ -#define CVT_F32_F32 0xc4 -#define CVT_F32_S32 0x44 -#define CVT_S32_F32 0x8c -#define CVT_S32_S32 0x0c -#define CVT_NEG 0x20 -#define CVT_RI 0x08 +#define CVT_RN (0x00 << 16) +#define CVT_FLOOR (0x02 << 16) +#define CVT_CEIL (0x04 << 16) +#define CVT_TRUNC (0x06 << 16) +#define CVT_SAT (0x08 << 16) +#define CVT_ABS (0x10 << 16) + +#define CVT_X32_X32 0x04004000 +#define CVT_X32_S32 0x04014000 +#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32) +#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32) +#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32) +#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32) +#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32) +#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32) +#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32) +#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32) + +#define CVT_NEG 0x20000000 +#define CVT_RI 0x08000000 static void emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, - int wp, unsigned cvn, unsigned fmt) + int wp, uint32_t cvn) { struct nv50_program_exec *e; e = exec(pc); - set_long(pc, e); - e->inst[0] |= 0xa0000000; - e->inst[1] |= 0x00004000; /* 32 bit src */ - e->inst[1] |= (cvn << 16); - e->inst[1] |= (fmt << 24); + if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG; + if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS; + + e->inst[0] = 0xa0000000; + e->inst[1] = cvn; + set_long(pc, e); set_src_0(pc, src, e); if (wp >= 0) @@ -1196,10 +1306,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, * 0x6 = GE * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) * 0x8 = unordered bit (allows NaN) + * + * mode = 0x04 (u32), 0x0c (s32), 0x80 (f32) */ static void emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, - struct nv50_reg *src0, struct nv50_reg *src1) + struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode) { static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; @@ -1214,16 +1326,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, if (dst && dst->type != P_TEMP) dst = alloc_temp(pc, NULL); - /* set.u32 */ set_long(pc, e); - e->inst[0] |= 0xb0000000; + e->inst[0] |= 0x30000000 | (mode << 24); e->inst[1] |= 0x60000000 | (ccode << 14); - /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but - * that doesn't seem to match what the hw actually does - e->inst[1] |= 0x04000000; << breaks things, u32 by default ? - */ - if (wp >= 0) set_pred_wr(pc, 1, wp, e); if (dst) @@ -1238,33 +1344,146 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, emit(pc, e); - /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ - if (rdst) - emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); + if (rdst && mode == 0x80) /* convert to float ? */ + emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32); if (rdst && rdst != dst) free_temp(pc, dst); } -static INLINE unsigned -map_tgsi_setop_cc(unsigned op) +static INLINE void +map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty) { switch (op) { - case TGSI_OPCODE_SLT: return 0x1; - case TGSI_OPCODE_SGE: return 0x6; - case TGSI_OPCODE_SEQ: return 0x2; - case TGSI_OPCODE_SGT: return 0x4; - case TGSI_OPCODE_SLE: return 0x3; - case TGSI_OPCODE_SNE: return 0xd; + case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break; + case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break; + case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break; + case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break; + case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break; + case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break; + + case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break; + case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break; + case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break; + case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break; + case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break; + case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break; default: assert(0); - return 0; + return; + } +} + +static void +emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *rsrc1) +{ + struct nv50_program_exec *e = exec(pc); + struct nv50_reg *src1; + + e->inst[0] = 0x20000000; + + alloc_reg(pc, rsrc1); + check_swap_src_0_1(pc, &src0, &rsrc1); + + src1 = rsrc1; + if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { + src1 = temp_temp(pc, e); + emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); + } + + if (!pc->allow32 || src1->hw > 63 || + (src1->type != P_TEMP && src1->type != P_IMMD)) + set_long(pc, e); + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + + if (is_long(e)) { + e->inst[1] |= 1 << 26; + set_src_2(pc, src1, e); + } else { + e->inst[0] |= 0x8000; + if (src1->type == P_IMMD) + set_immd(pc, src1, e); + else + set_src_1(pc, src1, e); } + + if (src0->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 28; + else + if (src1->mod & NV50_MOD_NEG) + e->inst[0] |= 1 << 22; + + emit(pc, e); +} + +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + + emit(pc, e); +} + +static void +emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + if (is_long(e)) + e->inst[1] |= 0x0c << 24; + else + e->inst[0] |= 0x81 << 8; + + emit(pc, e); } static INLINE void emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); + emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI); } static void @@ -1282,15 +1501,9 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, } static INLINE void -emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); -} - -static INLINE void emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { - emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); + emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32); } static void @@ -1308,18 +1521,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (3 << 1)) { tmp[0] = alloc_temp(pc, NULL); - emit_minmax(pc, 4, tmp[0], src[0], zero); + emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero); } if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); - emit_minmax(pc, 4, tmp[1], src[1], zero); + tmp[1] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); - emit_minmax(pc, 4, tmp[3], src[3], neg128); - emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + tmp[3] = temp_temp(pc, NULL); + emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); + emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); emit_pow(pc, dst[2], tmp[1], tmp[3]); emit_mov(pc, dst[2], zero); @@ -1347,12 +1560,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, FREE(one); } -static INLINE void -emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); -} - static void emit_kil(struct nv50_pc *pc, struct nv50_reg *src) { @@ -1364,14 +1571,9 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) set_long(pc, e); /* sets cond code to ALWAYS */ if (src) { - unsigned cvn = CVT_F32_F32; - set_pred(pc, 0x1 /* cc = LT */, r_pred, e); - - if (src->mod & NV50_MOD_NEG) - cvn |= CVT_NEG; - /* write predicate reg */ - emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); + /* write to predicate reg */ + emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32); } emit(pc, e); @@ -1474,8 +1676,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], src[1]->mod |= NV50_MOD_ABS; src[2]->mod |= NV50_MOD_ABS; - emit_minmax(pc, 4, t[2], src[0], src[1]); - emit_minmax(pc, 4, t[2], src[2], t[2]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]); + emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]); src[0]->mod = mod[0]; src[1]->mod = mod[1]; @@ -1778,6 +1980,21 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) q = 0x0403c000; m = 0xffff7fff; break; + case 0x2: + case 0x3: + /* ADD, SUB, SUBR b32 */ + m = ~(0x8000 | (127 << 16)); + q = ((e->inst[0] & (~m)) >> 2) | (1 << 26); + break; + case 0x5: + /* SAD */ + m = ~(0x81 << 8); + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; + break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ m = ~0x03000100; @@ -1814,8 +2031,8 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) } /* Some operations support an optional negation flag. */ -static boolean -negate_supported(const struct tgsi_full_instruction *insn, int i) +static int +get_supported_mods(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { case TGSI_OPCODE_ADD: @@ -1835,9 +2052,36 @@ negate_supported(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_SCS: case TGSI_OPCODE_SIN: case TGSI_OPCODE_SUB: - return TRUE; + return NV50_MOD_NEG; + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */ + return NV50_MOD_ABS; + case TGSI_OPCODE_CEIL: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_TRUNC: + return NV50_MOD_NEG | NV50_MOD_ABS; + case TGSI_OPCODE_F2I: + case TGSI_OPCODE_F2U: + case TGSI_OPCODE_I2F: + case TGSI_OPCODE_U2F: + return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32; + case TGSI_OPCODE_UADD: + return NV50_MOD_NEG | NV50_MOD_I32; + case TGSI_OPCODE_SAD: + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_IMAX: + case TGSI_OPCODE_IMIN: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: + case TGSI_OPCODE_UMAX: + case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: + case TGSI_OPCODE_USHR: + return NV50_MOD_I32; default: - return FALSE; + return 0; } } @@ -1944,11 +2188,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) static struct nv50_reg * tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, - boolean neg) + int mod) { struct nv50_reg *r = NULL; - struct nv50_reg *temp; - unsigned sgn, c, swz; + struct nv50_reg *temp = NULL; + unsigned sgn, c, swz, cvn; if (src->Register.File != TGSI_FILE_CONSTANT) assert(!src->Register.Indirect); @@ -1988,7 +2232,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = &pc->immd[src->Register.Index * 4 + c]; break; case TGSI_FILE_SAMPLER: - break; + return NULL; case TGSI_FILE_ADDRESS: r = pc->addr[src->Register.Index * 4 + c]; assert(r); @@ -2003,35 +2247,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, break; } + cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32; + switch (sgn) { - case TGSI_UTIL_SIGN_KEEP: - break; case TGSI_UTIL_SIGN_CLEAR: - temp = temp_temp(pc); - emit_abs(pc, temp, r); - r = temp; - break; - case TGSI_UTIL_SIGN_TOGGLE: - if (neg) - r->mod = NV50_MOD_NEG; - else { - temp = temp_temp(pc); - emit_neg(pc, temp, r); - r = temp; - } + r->mod = NV50_MOD_ABS; break; case TGSI_UTIL_SIGN_SET: - temp = temp_temp(pc); - emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); - r = temp; + r->mod = NV50_MOD_NEG_ABS; + break; + case TGSI_UTIL_SIGN_TOGGLE: + r->mod = NV50_MOD_NEG; break; default: - assert(0); + assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP); break; } - if (r && r->acc >= 0 && r != temp) - return reg_instance(pc, r); + if ((r->mod & mod) != r->mod) { + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, r, -1, cvn); + r->mod = 0; + r = temp; + } else + r->mod |= mod & NV50_MOD_I32; + + assert(r); + if (r->acc >= 0 && r != temp) + return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2195,22 +2438,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fs = &inst->Src[i]; unsigned src_mask; - boolean neg_supp; + int mod_supp; src_mask = nv50_tgsi_src_mask(inst, i); - neg_supp = negate_supported(inst, i); + mod_supp = get_supported_mods(inst, i); if (fs->Register.File == TGSI_FILE_SAMPLER) unit = fs->Register.Index; for (c = 0; c < 4; c++) if (src_mask & (1 << c)) - src[i][c] = tgsi_src(pc, c, fs, neg_supp); + src[i][c] = tgsi_src(pc, c, fs, mod_supp); } brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2219,7 +2462,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2230,7 +2473,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_abs(pc, dst[c], src[0][c]); + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_ABS | CVT_F32_F32); } break; case TGSI_OPCODE_ADD: @@ -2252,8 +2496,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, break; case TGSI_OPCODE_ARL: assert(src[0][0]); - temp = temp_temp(pc); - emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); + temp = temp_temp(pc, NULL); + emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); emit_arl(pc, dst[0], temp, 4); break; case TGSI_OPCODE_BGNLOOP: @@ -2282,7 +2526,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_CEIL, CVT_F32_F32 | CVT_RI); + CVT_CEIL | CVT_F32_F32 | CVT_RI); } break; case TGSI_OPCODE_CMP: @@ -2290,7 +2534,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); + emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32); emit_mov(pc, dst[c], src[1][c]); set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ emit_mov(pc, dst[c], src[2][c]); @@ -2309,7 +2553,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_COS, brdc, temp); @@ -2397,8 +2641,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, struct nv50_reg *t[2]; assert(!temp); - t[0] = temp_temp(pc); - t[1] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); if (mask & 0x6) emit_mov(pc, t[0], src[0][0]); @@ -2419,6 +2663,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0f); } break; + case TGSI_OPCODE_F2I: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_S32_F32); + } + break; + case TGSI_OPCODE_F2U: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_TRUNC | CVT_U32_F32); + } + break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2427,7 +2687,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2435,14 +2695,42 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_sub(pc, dst[c], src[0][c], temp); } break; + case TGSI_OPCODE_I2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32); + } + break; case TGSI_OPCODE_IF: assert(pc->if_lvl < NV50_MAX_COND_NESTING); - emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN, - CVT_F32_F32); + emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32); pc->if_join[pc->if_lvl] = emit_joinat(pc); pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);; terminate_mbb(pc); break; + case TGSI_OPCODE_IMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_IMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_INEG: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, + CVT_S32_S32 | CVT_NEG); + } + break; case TGSI_OPCODE_KIL: assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]); emit_kil(pc, src[0][0]); @@ -2463,13 +2751,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, { struct nv50_reg *t[2]; - t[0] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); if (mask & (1 << 1)) - t[1] = temp_temp(pc); + t[1] = temp_temp(pc, NULL); else t[1] = t[0]; - emit_abs(pc, t[0], src[0][0]); + emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32); emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); if (mask & (1 << 2)) emit_mov(pc, dst[2], t[1]); @@ -2488,7 +2776,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2507,14 +2795,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MIN: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]); } break; case TGSI_OPCODE_MOV: @@ -2531,10 +2819,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: @@ -2543,11 +2840,20 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; src[0][0]->mod |= NV50_MOD_ABS; emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; + case TGSI_OPCODE_SAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } + break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) @@ -2559,6 +2865,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (mask & (1 << 3)) emit_mov_immdval(pc, dst[3], 1.0); break; + case TGSI_OPCODE_SHL: + case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_USHR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_shift(pc, dst[c], src[0][c], src[1][c], + inst->Instruction.Opcode); + } + break; case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); @@ -2566,7 +2882,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_SIN, brdc, temp); @@ -2577,12 +2893,23 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: - i = map_tgsi_setop_cc(inst->Instruction.Opcode); + case TGSI_OPCODE_ISLT: + case TGSI_OPCODE_ISGE: + case TGSI_OPCODE_USEQ: + case TGSI_OPCODE_USGE: + case TGSI_OPCODE_USLT: + case TGSI_OPCODE_USNE: + { + uint8_t cc, ty; + + map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty); + for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; - emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); + emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty); } + } break; case TGSI_OPCODE_SUB: for (c = 0; c < 4; c++) { @@ -2612,11 +2939,72 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c))) continue; emit_cvt(pc, dst[c], src[0][c], -1, - CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); + CVT_TRUNC | CVT_F32_F32 | CVT_RI); + } + break; + case TGSI_OPCODE_U2F: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32); + } + break; + case TGSI_OPCODE_UADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add_b32(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2670,7 +3058,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -2679,7 +3067,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, static void prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) { - struct nv50_reg *reg = NULL; + struct nv50_reg *r, *reg = NULL; const struct tgsi_full_src_register *src; const struct tgsi_dst_register *dst; unsigned i, c, k, mask; @@ -2725,7 +3113,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) continue; k = tgsi_util_get_full_src_register_swizzle(src, c); - reg[src->Register.Index * 4 + k].acc = pc->insn_nr; + r = ®[src->Register.Index * 4 + k]; + + /* If used before written, pre-allocate the reg, + * lest we overwrite results from a subroutine. + */ + if (!r->acc && r->type == P_TEMP) + alloc_reg(pc, r); + + r->acc = pc->insn_nr; } } } @@ -2814,7 +3210,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { unsigned chn, mask = nv50_tgsi_src_mask(insn, i); - boolean neg_supp = negate_supported(insn, i); + int ms = get_supported_mods(insn, i); fs = &insn->Src[i]; if (fs->Register.File != fd->Register.File || @@ -2832,10 +3228,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, if (!(fd->Register.WriteMask & (1 << c))) continue; - /* no danger if src is copied to TEMP first */ - if ((s != TGSI_UTIL_SIGN_KEEP) && - (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) - continue; + if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG)) + continue; + if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS)) + continue; + if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3)) + continue; rdep[c] |= nv50_tgsi_dst_revdep( insn->Instruction.Opcode, i, chn); @@ -2859,7 +3257,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; @@ -3579,7 +3977,7 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(13, 2); + so = so_new(5, 8, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3615,7 +4013,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(64, 2); + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -3635,12 +4033,13 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { struct nv50_program *fp = nv50->fragprog; struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3674,7 +4073,9 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ @@ -3685,6 +4086,7 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) ++m; } } + return origin; } static int @@ -3783,7 +4185,7 @@ nv50_linkage_validate(struct nv50_context *nv50) } /* now fill the stateobj */ - so = so_new(64, 0); + so = so_new(7, 57, 0); n = (m + 3) / 4; so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); @@ -3801,7 +4203,9 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5d9e18218a..5a4ab3508b 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7e039ea82e..28e2b35dea 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -189,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } +static int +nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, + unsigned usage) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *ctx = screen->cur_ctx; + + if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) + return 0; + + /* Our vtxbuf got mapped, it can no longer be considered part of current + * state, remove it to avoid emitting reloc markers. + */ + if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, + nouveau_bo(pb))) { + so_ref(NULL, &ctx->state.vtxbuf); + ctx->dirty |= NV50_NEW_ARRAYS; + } + + return 0; +} + struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -216,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; + screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); @@ -228,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->m2mf, 1); /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); @@ -237,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->eng2d, 2); /* 3D object */ switch (chipset & 0xf0) { @@ -273,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) nv50_screen_destroy(pscreen); return NULL; } - BIND_RING(chan, screen->tesla, 3); /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); @@ -284,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) } /* Static M2MF init */ - so = so_new(32, 0); + so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -293,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); /* Static 2D init */ - so = so_new(64, 0); + so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); @@ -309,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(256, 20); + so = so_new(40, 84, 20); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 61e24a5b57..a038a4e3c2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -2,6 +2,7 @@ #define __NV50_SCREEN_H__ #include "nouveau/nouveau_screen.h" +#include "nv50_context.h" struct nv50_screen { struct nouveau_screen base; @@ -9,6 +10,7 @@ struct nv50_screen { struct nouveau_winsys *nvws; unsigned cur_pctx; + struct nv50_context *cur_ctx; struct nouveau_grobj *tesla; struct nouveau_grobj *eng2d; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 30b2b0f91b..1f67df814b 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -35,7 +35,7 @@ static void * nv50_blend_state_create(struct pipe_context *pipe, const struct pipe_blend_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(5, 24, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); unsigned cmask = 0, i; @@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, (wrap_mode(cso->wrap_r) << 6)); switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; break; @@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe, } switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_ANISO: case PIPE_TEX_FILTER_LINEAR: tsc[1] |= NV50TSC_1_1_MINF_LINEAR; break; @@ -280,7 +278,7 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(15, 21, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -425,7 +423,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, { struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); - struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_stateobj *so = so_new(8, 22, 0); so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); so_data (so, cso->depth.writemask ? 1 : 0); diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index c8bdf9dc27..f83232f43c 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -33,7 +33,7 @@ static void nv50_state_validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_stateobj *so = so_new(128, 18); + struct nouveau_stateobj *so = so_new(32, 79, 18); struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; @@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50) struct nv50_screen *screen = nv50->screen; struct nouveau_channel *chan = screen->base.channel; + /* I don't want to copy headers from the winsys. */ + screen->cur_ctx = nv50; + if (nv50->pctx_id != screen->cur_pctx) { if (nv50->state.fb) nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; @@ -296,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50) so_ref(nv50->rasterizer->so, &nv50->state.rast); if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(5, 0); + so = so_new(1, 4, 0); so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); so_data (so, fui(nv50->blend_colour.color[0])); so_data (so, fui(nv50->blend_colour.color[1])); @@ -307,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50) } if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(33, 0); + so = so_new(1, 32, 0); so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); for (i = 0; i < 32; i++) so_data(so, util_bswap32(nv50->stipple.stipple[i])); @@ -324,7 +327,7 @@ nv50_state_validate(struct nv50_context *nv50) goto scissor_uptodate; nv50->state.scissor_enabled = rast->scissor; - so = so_new(3, 0); + so = so_new(1, 2, 0); so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); @@ -353,7 +356,7 @@ scissor_uptodate: goto viewport_uptodate; nv50->state.viewport_bypass = bypass; - so = so_new(14, 0); + so = so_new(5, 9, 0); if (!bypass) { so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); @@ -397,7 +400,8 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4); + so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES + + nr * 8, PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index c4ca096d6a..bef548b728 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50) { struct nouveau_stateobj *so; struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned p, push, nrlc; + unsigned p, start, push, nrlc; - for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) { + start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]); nrlc += nv50->miptree_nr[p]; } - push = push * 11 + 23 * PIPE_SHADER_TYPES + 4; + start = start * 2 + 4 * PIPE_SHADER_TYPES + 2; + push = push * 9 + 19 * PIPE_SHADER_TYPES + 2; nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES; - so = so_new(push, nrlc); + so = so_new(start, push, nrlc); if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE || nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) { diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 602adfc50d..f2e510fba6 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } -boolean +void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { @@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - return ret; + /* XXX: not sure what to do if ret != TRUE: flush and retry? + */ + assert(ret); } static INLINE boolean @@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, return TRUE; } -boolean +void nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) @@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe, OUT_RING (chan, 0); pipe_buffer_unmap(pscreen, indexBuffer); - - return ret; + + /* XXX: what to do if ret != TRUE? Flush and retry? + */ + assert(ret); } static INLINE boolean @@ -350,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr * 5, 0); + *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); switch (ve->nr_components) { case 4: @@ -411,8 +415,8 @@ nv50_vbo_validate(struct nv50_context *nv50) n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4); - vtxfmt = so_new(n_ve + 1, 0); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); for (i = 0; i < nv50->vtxelt_nr; i++) { diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d536..c14414fff6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,9 +27,9 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d39..af95bbe789 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -107,6 +110,25 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(name) \ + r300->name##_state.state = NULL; \ + r300->name##_state.emit = r300_emit_##name##_state; \ + r300->name##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->name##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop); + R300_INIT_ATOM(blend); + R300_INIT_ATOM(blend_color); + R300_INIT_ATOM(clip); + R300_INIT_ATOM(dsa); + R300_INIT_ATOM(rs); + R300_INIT_ATOM(scissor); + R300_INIT_ATOM(viewport); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -155,11 +177,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300_setup_atoms(r300); + + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(r300_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc..5937f0e2cc 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,18 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + struct r300_atom *prev, *next; + void* state; + void (*emit)(struct r300_context*, void*); + boolean dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -62,11 +71,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -105,9 +109,6 @@ struct r300_sampler_state { struct r300_scissor_regs { uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; }; struct r300_scissor_state { @@ -135,24 +136,17 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 -#define R300_NEW_BLEND_COLOR 0x00000002 -#define R300_NEW_CLIP 0x00000004 -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -273,38 +267,40 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -317,6 +313,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 199ce3a945..0e5533c790 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,11 +37,12 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -52,14 +54,13 @@ void r300_emit_blend_state(struct r300_context* r300, OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -76,9 +77,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -106,13 +107,13 @@ void r300_emit_clip_state(struct r300_context* r300, } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -121,10 +122,16 @@ void r300_emit_dsa_state(struct r300_context* r300, }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -138,6 +145,8 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -160,11 +169,31 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; + case RC_STATE_R300_VIEWPORT_SCALE: + if (r300->tcl_bypass) { + vec[0] = 1; + vec[1] = 1; + vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; + } + break; + + case RC_STATE_R300_VIEWPORT_OFFSET: + if (!r300->tcl_bypass) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; + } + break; + default: debug_printf("r300: Implementation error: " "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); @@ -283,6 +312,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } +static void r300_emit_fragment_depth_config(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + if (r300_fragment_shader_writes_depth(fs)) { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US); + } else { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US); + } + END_CS; +} + void r500_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code) { @@ -531,8 +576,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -607,10 +653,11 @@ static void r300_emit_scissor_regs(struct r300_context* r300, END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { - if (r300->rs_state->rs.scissor) { + struct r300_scissor_state* scissor = (struct r300_scissor_state*)state; + /* XXX argfl! */ + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { r300_emit_scissor_regs(r300, &scissor->scissor); } else { r300_emit_scissor_regs(r300, &scissor->framebuffer); @@ -867,26 +914,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - if (r300->rs_state->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) @@ -910,6 +958,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); @@ -933,13 +991,10 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; + struct r300_atom* atom; int i, dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; - } - /* Check size of CS. */ /* Make sure we have at least 8*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to @@ -997,7 +1052,7 @@ validate: goto validate; } } else { - // debug_printf("No VBO while emitting dirty state!\n"); + /* debug_printf("No VBO while emitting dirty state!\n"); */ } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -1015,27 +1070,15 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; - } - - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; + foreach(atom, &r300->atom_list) { + if (atom->dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { @@ -1060,21 +1103,11 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1096,11 +1129,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } @@ -1129,7 +1157,7 @@ validate: */ /* Finally, emit the VBO. */ - //r300_emit_vertex_buffer(r300); + /* r300_emit_vertex_buffer(r300); */ r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332..05a6bfeae8 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,17 +31,13 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, @@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4e1b61ca40..60ea9c171d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->fog = i; break; + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + fs_inputs->wpos = i; + break; + default: assert(0); } @@ -114,6 +119,9 @@ static void allocate_hardware_inputs( if (inputs->fog != ATTR_UNUSED) { allocate(mydata, inputs->fog, reg++); } + if (inputs->wpos != ATTR_UNUSED) { + allocate(mydata, inputs->wpos, reg++); + } } static void get_compare_state( @@ -144,6 +152,7 @@ static void r300_translate_fragment_shader( struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + int wpos = fs->inputs.wpos; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -171,6 +180,18 @@ static void r300_translate_fragment_shader( fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /** + * Transform the program to support WPOS. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. */ + if (wpos != ATTR_UNUSED) { + /* Moving the input to some other reg is not really necessary. */ + rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d8d08fbe26..034bfc15cf 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2186,6 +2186,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) +# define R500_SRC_ALPHA_0_NO_READ (1 << 30) +# define R500_SRC_ALPHA_1_NO_READ (1 << 31) /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) @@ -2638,7 +2640,7 @@ enum { VE_COND_MUX_GTE = 25, VE_SET_GREATER_THAN = 26, VE_SET_EQUAL = 27, - VE_SET_NOT_EQUAL = 28, + VE_SET_NOT_EQUAL = 28 }; enum { @@ -2672,20 +2674,20 @@ enum { ME_PRED_SET_CLR = 25, ME_PRED_SET_INV = 26, ME_PRED_SET_POP = 27, - ME_PRED_SET_RESTORE = 28, + ME_PRED_SET_RESTORE = 28 }; enum { /* R3XX */ PVS_MACRO_OP_2CLK_MADD = 0, - PVS_MACRO_OP_2CLK_M2X_ADD = 1, + PVS_MACRO_OP_2CLK_M2X_ADD = 1 }; enum { PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ - PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ }; enum { @@ -2694,7 +2696,7 @@ enum { PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ - PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ }; enum { @@ -2703,7 +2705,7 @@ enum { PVS_SRC_SELECT_Z = 2, /* Select Z Component */ PVS_SRC_SELECT_W = 3, /* Select W Component */ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ - PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ + PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ }; /* PVS Opcode & Destination Operand Description */ @@ -2742,7 +2744,7 @@ enum { PVS_DST_ADDR_SEL_MASK = 0x3, PVS_DST_ADDR_SEL_SHIFT = 29, PVS_DST_ADDR_MODE_0_MASK = 0x1, - PVS_DST_ADDR_MODE_0_SHIFT = 31, + PVS_DST_ADDR_MODE_0_SHIFT = 31 }; /* PVS Source Operand Description */ @@ -2777,7 +2779,7 @@ enum { PVS_SRC_ADDR_SEL_MASK = 0x3, PVS_SRC_ADDR_SEL_SHIFT = 29, PVS_SRC_ADDR_MODE_1_MASK = 0x0, - PVS_SRC_ADDR_MODE_1_SHIFT = 32, + PVS_SRC_ADDR_MODE_1_SHIFT = 32 }; /*\}*/ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index a89cb633e0..ee43421cdb 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -69,16 +69,11 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return r300->rs_state->rs.scissor && - r300->scissor_state->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +93,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; @@ -213,7 +208,7 @@ validate: } /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, +void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -225,30 +220,29 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return FALSE; + return; } if (!r300->winsys->validate(r300->winsys)) { - return FALSE; + return; } r300_emit_dirty_state(r300); @@ -257,41 +251,38 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - - return TRUE; } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } r300_emit_dirty_state(r300); @@ -299,8 +290,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_emit_aos(r300, start); r300_emit_draw_arrays(r300, mode, count); - - return TRUE; } /**************************************************************************** @@ -309,7 +298,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -318,11 +307,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -346,12 +331,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -365,11 +348,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -400,8 +379,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083..27b5e6a963 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a8667d483..287664b1d2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: @@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } default: debug_printf("r300: Implementation error: Bad param %d\n", param); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index 85184e2cfd..6796841b29 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -40,6 +40,7 @@ struct r300_shader_semantics { int bcolor[ATTR_COLOR_COUNT]; int generic[ATTR_GENERIC_COUNT]; int fog; + int wpos; }; static INLINE void r300_shader_semantics_reset( @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; info->fog = ATTR_UNUSED; + info->wpos = ATTR_UNUSED; for (i = 0; i < ATTR_COLOR_COUNT; i++) { info->color[i] = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 49072462ec..78764ddc98 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +42,120 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 0, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 1, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ @@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe, ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); - /* optimization: some operations do not require the destination color */ + /* Optimization: some operations do not require the destination color. + * + * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, + * otherwise blending gives incorrect results. It seems to be + * a hardware bug. */ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || dstRGB != PIPE_BLENDFACTOR_ZERO || @@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || - srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { + /* Enable reading from the colorbuffer. */ blend->blend_control |= R300_READ_ENABLE; - /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ - /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) { + /* Optimization: Depending on incoming pixels, we can + * conditionally disable the reading in hardware... */ + if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && + eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { + /* Disable reading if SRC_ALPHA == 0. */ + if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + } + + /* Disable reading if SRC_ALPHA == 1. */ + if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + } + } + } + } + + /* Optimization: discard pixels which don't change the colorbuffer. + * + * The code below is non-trivial and some math is involved. + * + * Discarding pixels must be disabled when FP16 AA is enabled. + * This is a hardware bug. Also, this implementation wouldn't work + * with FP blending enabled and equation clamping disabled. + * + * Equations other than ADD are rarely used and therefore won't be + * optimized. */ + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { + /* ADD: X+Y + * REVERSE_SUBTRACT: Y-X + * + * The idea is: + * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, + * then CB will not be changed. + * + * Given the srcFactor and dstFactor variables, we can derive + * what src and dst should be equal to and discard appropriate + * pixels. + */ + if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + } else if (blend_discard_if_src_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + } else if (blend_discard_if_src_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; + } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; + } + } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { @@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -151,20 +340,22 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, @@ -173,8 +364,8 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.dirty = TRUE; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); @@ -272,8 +463,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -303,9 +494,6 @@ static void r300_set_scissor_regs(const struct pipe_scissor_state* state, (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; } static void @@ -313,7 +501,9 @@ static void const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; + struct r300_scissor_state* scissor = + (struct r300_scissor_state*)r300->scissor_state.state; + struct pipe_scissor_state pscissor; if (r300->draw) { draw_flush(r300->draw); @@ -321,18 +511,19 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, + /* XXX Arg. This is silly. */ + pscissor.minx = pscissor.miny = 0; + pscissor.maxx = state->width; + pscissor.maxy = state->height; + r300_set_scissor_regs(&pscissor, &scissor->framebuffer, r300_screen(r300->context.screen)->caps->is_r500); /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; + + r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -367,6 +558,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); + if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -407,8 +602,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -524,12 +717,19 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } /* Free rasterizer state. */ @@ -556,7 +756,8 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the emit function to clamp them properly. */ @@ -663,50 +864,54 @@ static void r300_set_scissor_state(struct pipe_context* pipe, const struct pipe_scissor_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_scissor_state* scissor = + (struct r300_scissor_state*)r300->scissor_state.state; - r300_set_scissor_regs(state, &r300->scissor_state->scissor, + r300_set_scissor_regs(state, &scissor->scissor, r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -778,7 +983,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + + r300->dirty_state |= + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | + R300_NEW_VERTEX_FORMAT; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 727ae7ade6..192846411b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300, void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; if (r300_screen(r300->context.screen)->caps->is_r500) { rX00_rs_col = r500_rs_col; @@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); @@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize WPOS. */ + /* If the FS doesn't need it, it's not written by the VS. */ + if (fs_inputs->wpos != ATTR_UNUSED) { + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(rs, tex_count, fp_offset); + + fp_offset++; + tex_count++; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(rs, 0, 0, TRUE); @@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; /* This is important enough that I felt it warranted a comment. * @@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } + + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index dbe42edd91..35be00e1b0 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } } -static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, + int is_anisotropic) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index bcd4c030f9..f25f3ca217 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); - OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Sign/normalize control */ @@ -118,8 +116,8 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); if (caps->is_r500) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 096cdb20bb..a792c2cf98 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index c4ed0d712f..68aef70872 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" +#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -33,6 +34,8 @@ #include "radeon_compiler.h" +#include "util/u_math.h" + /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs( } } -static void r300_shader_vap_output_fmt( - struct r300_shader_semantics* vs_outputs, - uint* hwfmt) +static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) { + struct r300_shader_semantics* vs_outputs = &vs->outputs; + uint32_t* hwfmt = vs->hwfmt; int i, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Do the actual vertex_info setup. * @@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + } + } /* Texture coordinates. */ gen_count = 0; @@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt( /* XXX magic */ assert(gen_count <= 8); + + /* WPOS. */ + vs->wpos_tex_output = gen_count; } /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed @@ -155,6 +169,8 @@ static void r300_stream_locations_notcl( int* stream_loc) { int i, tabi = 0, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Position. */ stream_loc[tabi++] = 0; @@ -166,14 +182,14 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { stream_loc[tabi++] = 4 + i; } } @@ -181,7 +197,7 @@ static void r300_stream_locations_notcl( /* Texture coordinates. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -195,8 +211,12 @@ static void r300_stream_locations_notcl( gen_count++; } - /* XXX magic */ - assert(gen_count <= 8); + /* WPOS. */ + if (vs_outputs->wpos != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; int i, reg = 0; + boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || + outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) @@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->psize] = reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; + } else if (any_bcolor_used) { + reg++; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->bcolor[i] != ATTR_UNUSED) { + c->code->outputs[outputs->bcolor[i]] = reg++; + } else if (any_bcolor_used) { + reg++; + } + } /* Texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT; i++) { @@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (outputs->fog != ATTR_UNUSED) { c->code->outputs[outputs->fog] = reg++; } + + /* WPOS. */ + if (outputs->wpos != ATTR_UNUSED) { + c->code->outputs[outputs->wpos] = reg++; + } +} + +static void r300_insert_wpos(struct r300_vertex_program_compiler* c, + struct r300_shader_semantics* outputs) +{ + int i, lastOutput = 0; + + /* Find the max output index. */ + lastOutput = MAX2(lastOutput, outputs->psize); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->color[i]); + lastOutput = MAX2(lastOutput, outputs->bcolor[i]); + } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->generic[i]); + } + lastOutput = MAX2(lastOutput, outputs->fog); + + /* Set WPOS after the last output. */ + lastOutput++; + rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ + outputs->wpos = lastOutput; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); - compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; + /* Insert the WPOS output. */ + r300_insert_wpos(&compiler, &vs->outputs); + + r300_shader_vap_output_fmt(vs); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { @@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } + +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs; + int tex_output = r300->vs->wpos_tex_output; + uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; + uint32_t* hwfmt = vs->hwfmt; + + if (r300->fs->inputs.wpos != ATTR_UNUSED) { + /* Enable WPOS in VAP. */ + if (!(hwfmt[1] & tex_fmt)) { + hwfmt[1] |= tex_fmt; + hwfmt[3] |= (4 << (3 * tex_output)); + + assert(tex_output < 8); + return TRUE; + } + } else { + /* Disable WPOS in VAP. */ + if (hwfmt[1] & tex_fmt) { + hwfmt[1] &= ~tex_fmt; + hwfmt[3] &= ~(4 << (3 * tex_output)); + return TRUE; + } + } + return FALSE; +} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 67e9db5366..18cfeee3cd 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -43,6 +43,9 @@ struct r300_vertex_shader { /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; + /* Output stream location for WPOS. */ + int wpos_tex_output; + /* Has this shader been translated yet? */ boolean translated; @@ -53,4 +56,7 @@ struct r300_vertex_shader { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); +/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); + #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index f98087deb8..5f130453c3 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 82173a3c2a..f3ac6760db 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -252,6 +265,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 6a89bd4b06..73fa744f9d 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -116,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 9ea5d6fb9f..03d35fb3cb 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -97,11 +98,11 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } -boolean +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); + softpipe_draw_elements(pipe, NULL, 0, mode, start, count); } @@ -110,7 +111,7 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * Basically, map the vertex buffers (and drawing surfaces), then hand off * the drawing to the 'draw' module. */ -boolean +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -122,6 +123,9 @@ softpipe_draw_range_elements(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return; + sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) @@ -177,19 +181,17 @@ softpipe_draw_range_elements(struct pipe_context *pipe, softpipe_unmap_constant_buffers(sp); sp->dirty_render_cache = TRUE; - - return TRUE; } -boolean +void softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - return softpipe_draw_range_elements( pipe, indexBuffer, - indexSize, - 0, 0xffffffff, - mode, start, count ); + softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad06..4ef5d9f7b1 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a4575..736c033897 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 5a32d211d6..9b18dac67b 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -184,14 +184,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *, void softpipe_update_derived( struct softpipe_context *softpipe ); -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); -boolean +void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +void softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +void softpipe_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index e26153b1d9..1ae8fecacf 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2,7 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * Copyright 2008 VMware, Inc. All rights reserved. + * Copyright 2008-2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -514,21 +514,15 @@ static float compute_lambda_1d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]); float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]); float rho = MAX2(dsdx, dsdy) * texture->width0; - float lambda; - - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - return lambda; + return util_fast_log2(rho); } @@ -536,8 +530,7 @@ static float compute_lambda_2d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float rho = MAX2(maxx, maxy); - float lambda; - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } @@ -562,8 +550,7 @@ static float compute_lambda_3d(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { const struct pipe_texture *texture = samp->texture; const struct pipe_sampler_state *sampler = samp->sampler; @@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp, float maxx = MAX2(dsdx, dsdy) * texture->width0; float maxy = MAX2(dtdx, dtdy) * texture->height0; float maxz = MAX2(dpdx, dpdy) * texture->depth0; - float rho, lambda; + float rho; rho = MAX2(maxx, maxy); rho = MAX2(rho, maxz); - lambda = util_fast_log2(rho); - lambda += lodbias + sampler->lod_bias; - lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); - - return lambda; + return util_fast_log2(rho); } /** * Compute lambda for a vertex texture sampler. - * Since there aren't derivatives to use, just return the LOD bias. + * Since there aren't derivatives to use, just return 0. */ static float compute_lambda_vert(const struct sp_sampler_varient *samp, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias) + const float p[QUAD_SIZE]) { - return lodbias; + return 0.0f; } @@ -769,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -827,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -866,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -914,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -949,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -996,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1035,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1076,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1115,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1161,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1209,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1261,29 +1254,60 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler, } +/* Calculate level of detail for every fragment. + * Note that lambda has already been biased by global LOD bias. + */ +static INLINE void +compute_lod(const struct pipe_sampler_state *sampler, + const float biased_lambda, + const float lodbias[QUAD_SIZE], + float lod[QUAD_SIZE]) +{ + uint i; + + for (i = 0; i < QUAD_SIZE; i++) { + lod[i] = biased_lambda + lodbias[i]; + lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod); + } +} + + static void mip_filter_linear(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else if (level0 >= texture->last_level) { samp->level = texture->last_level; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1292,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, int c,j; samp->level = level0; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1311,23 +1335,36 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; float lambda; + float lod[QUAD_SIZE]; - lambda = samp->compute_lambda(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { samp->level = 0; - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { samp->level = (int)(lambda + 0.5) ; samp->level = MIN2(samp->level, (int)texture->last_level); - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } #if 0 @@ -1345,17 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); - float lambda = samp->compute_lambda(samp, s, t, p, lodbias); + float lambda; + float lod[QUAD_SIZE]; + + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; if (lambda < 0.0) { - samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { - samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba ); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } } @@ -1371,15 +1423,28 @@ mip_filter_linear_2d_linear_repeat_POT( const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); const struct pipe_texture *texture = samp->texture; int level0; float lambda; + float lod[QUAD_SIZE]; - lambda = compute_lambda_2d(samp, s, t, p, lodbias); + if (control == tgsi_sampler_lod_bias) { + lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; level0 = (int)lambda; /* Catches both negative and large values of level0: @@ -1390,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT( else samp->level = texture->last_level; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } else { float levelBlend = lambda - level0; @@ -1399,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT( int c,j; samp->level = level0; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0); samp->level = level0+1; - img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 ); + img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1); for (j = 0; j < QUAD_SIZE; j++) { for (c = 0; c < 4; c++) { @@ -1422,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1430,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler, int j, k0, k1, k2, k3; float val; - samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba ); + samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba); /** * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' @@ -1508,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]) { struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); @@ -1589,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, * is not active, this will point somewhere deeper into the * pipeline, eg. to mip_filter or even img_filter. */ - samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba); + samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } @@ -1862,7 +1929,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler, break; } - if (sampler->compare_mode != FALSE) { + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { samp->compare = sample_compare; } else { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index b0797711d3..b6e66c998a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2010 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -46,14 +47,14 @@ typedef void (*wrap_linear_func)(const float s[4], typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias); + const float p[QUAD_SIZE]); typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], - float lodbias, + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, float rgba[NUM_CHANNELS][QUAD_SIZE]); diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index c3de12b4a3..af99c9de37 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -29,6 +29,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "util/u_upload_mgr.h" #include "svga_context.h" @@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe ) u_upload_destroy( svga->upload_vb ); u_upload_destroy( svga->upload_ib ); + util_bitmask_destroy( svga->vs_bm ); + util_bitmask_destroy( svga->fs_bm ); + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) pipe_buffer_reference( &svga->curr.cb[shader], NULL ); @@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga = CALLOC_STRUCT(svga_context); if (svga == NULL) - goto error1; + goto no_svga; svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; @@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->swc = svgascreen->sws->context_create(svgascreen->sws); if(!svga->swc) - goto error2; + goto no_swc; svga_init_blend_functions(svga); svga_init_blit_functions(svga); @@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); if (!svga_init_swtnl(svga)) - goto error3; + goto no_swtnl; + + svga->fs_bm = util_bitmask_create(); + if (svga->fs_bm == NULL) + goto no_fs_bm; + + svga->vs_bm = util_bitmask_create(); + if (svga->vs_bm == NULL) + goto no_vs_bm; svga->upload_ib = u_upload_create( svga->pipe.screen, 32 * 1024, 16, PIPE_BUFFER_USAGE_INDEX ); if (svga->upload_ib == NULL) - goto error4; + goto no_upload_ib; svga->upload_vb = u_upload_create( svga->pipe.screen, 128 * 1024, 16, PIPE_BUFFER_USAGE_VERTEX ); if (svga->upload_vb == NULL) - goto error5; + goto no_upload_vb; svga->hwtnl = svga_hwtnl_create( svga, svga->upload_ib, svga->swc ); if (svga->hwtnl == NULL) - goto error6; + goto no_hwtnl; ret = svga_emit_initial_state( svga ); if (ret) - goto error7; + goto no_state; /* Avoid shortcircuiting state with initial value of zero. */ @@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) return &svga->pipe; -error7: +no_state: svga_hwtnl_destroy( svga->hwtnl ); -error6: +no_hwtnl: u_upload_destroy( svga->upload_vb ); -error5: +no_upload_vb: u_upload_destroy( svga->upload_ib ); -error4: +no_upload_ib: + util_bitmask_destroy( svga->vs_bm ); +no_vs_bm: + util_bitmask_destroy( svga->fs_bm ); +no_fs_bm: svga_destroy_swtnl(svga); -error3: +no_swtnl: svga->swc->destroy(svga->swc); -error2: +no_swc: FREE(svga); -error1: +no_svga: return NULL; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0885d9ca74..66259fd010 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -41,6 +41,7 @@ struct draw_vertex_shader; struct svga_shader_result; struct SVGACmdMemory; +struct util_bitmask; struct u_upload_mgr; @@ -265,8 +266,6 @@ struct svga_hw_draw_state unsigned ts[16][TS_MAX]; float cb[PIPE_SHADER_TYPES][CB_MAX][4]; - unsigned shader_id[PIPE_SHADER_TYPES]; - struct svga_shader_result *fs; struct svga_shader_result *vs; struct svga_hw_view_state views[PIPE_MAX_SAMPLERS]; @@ -319,12 +318,14 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of used shader IDs */ + struct util_bitmask *fs_bm; + struct util_bitmask *vs_bm; + struct { unsigned dirty[4]; unsigned texture_timestamp; - unsigned next_fs_id; - unsigned next_vs_id; /* Internally generated shaders: */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 8db40d0fd5..ca73cf9d5a 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c index 71a552862e..0f24ef4ee8 100644 --- a/src/gallium/drivers/svga/svga_pipe_draw.c +++ b/src/gallium/drivers/svga/svga_pipe_draw.c @@ -149,7 +149,7 @@ retry: -static boolean +static void svga_draw_range_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, @@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe, enum pipe_error ret = 0; if (!u_trim_pipe_prim( prim, &count )) - return TRUE; + return; /* * Mark currently bound target surfaces as dirty @@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe, #ifdef DEBUG if (svga->curr.vs->base.id == svga->debug.disable_shader || svga->curr.fs->base.id == svga->debug.disable_shader) - return 0; + return; #endif if (svga->state.sw.need_swtnl) @@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe, svga_hwtnl_flush_retry( svga ); svga_context_flush(svga, NULL); } - - return ret == PIPE_OK; } -static boolean +static void svga_draw_elements( struct pipe_context *pipe, struct pipe_buffer *index_buffer, unsigned index_size, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements( pipe, index_buffer, - index_size, - 0, 0xffffffff, - prim, start, count ); + svga_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + prim, start, count ); } -static boolean +static void svga_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) { - return svga_draw_range_elements(pipe, NULL, 0, - start, start + count - 1, - prim, - start, count); + svga_draw_range_elements(pipe, NULL, 0, + start, start + count - 1, + prim, + start, count); } diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index e3be840d92..5f1213e46a 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -107,7 +108,16 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->fs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.fs) + svga->state.hw_draw.fs = NULL; } FREE((void *)fs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 78053e755e..460a101f8c 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter ) switch (filter) { case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return SVGA3D_TEX_FILTER_LINEAR; - case PIPE_TEX_FILTER_ANISO: return SVGA3D_TEX_FILTER_ANISOTROPIC; default: assert(0); return SVGA3D_TEX_FILTER_NEAREST; @@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe, cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 ); + if(cso->aniso_level != 1) + cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index c104c41f5f..7e6ab576ad 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -172,7 +173,16 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->vs_bm, result->id ); + svga_destroy_shader_result( result ); + + /* + * Remove stale references to this result to ensure a new result on the + * same address will be detected as a change. + */ + if(result == svga->state.hw_draw.vs) + svga->state.hw_draw.vs = NULL; } FREE((void *)vs->base.tokens); diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 6ec38ed3e4..d29f3762d2 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "svga_context.h" #include "svga_state.h" @@ -39,8 +40,13 @@ static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a, const struct svga_fs_compile_key *b ) { - unsigned keysize = svga_fs_key_size( a ); - return memcmp( a, b, keysize ); + unsigned keysize_a = svga_fs_key_size( a ); + unsigned keysize_b = svga_fs_key_size( b ); + + if (keysize_a != keysize_b) { + return (int)(keysize_a - keysize_b); + } + return memcmp( a, b, keysize_a ); } @@ -66,7 +72,7 @@ static enum pipe_error compile_fs( struct svga_context *svga, struct svga_shader_result **out_result ) { struct svga_shader_result *result; - enum pipe_error ret; + enum pipe_error ret = PIPE_ERROR; result = svga_translate_fragment_program( fs, key ); if (result == NULL) { @@ -74,9 +80,12 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->fs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + result->id, SVGA3D_SHADERTYPE_PS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -84,14 +93,16 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_fs_id++; result->next = fs->base.results; fs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -116,7 +127,7 @@ fail: */ static int emit_white_fs( struct svga_context *svga ) { - int ret; + int ret = PIPE_ERROR; /* ps_3_0 * def c0, 1.000000, 0.000000, 0.000000, 1.000000 @@ -137,16 +148,26 @@ static int emit_white_fs( struct svga_context *svga ) 0x0000ffff, }; + assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); + svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); + if(svga->state.white_fs_id == SVGA3D_INVALID_ID) + goto no_fs_id; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + svga->state.white_fs_id, SVGA3D_SHADERTYPE_PS, white_tokens, sizeof(white_tokens)); if (ret) - return ret; + goto no_definition; - svga->state.white_fs_id = svga->state.next_fs_id++; return 0; + +no_definition: + util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); + svga->state.white_fs_id = SVGA3D_INVALID_ID; +no_fs_id: + return ret; } @@ -251,15 +272,14 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, + if (result != svga->state.hw_draw.fs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_FS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; svga->state.hw_draw.fs = result; } diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 44b7ceb4fa..ae1e77e7d4 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "translate/translate.h" #include "svga_context.h" @@ -78,8 +79,12 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->vs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_vs_id, + result->id, SVGA3D_SHADERTYPE_VS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -87,14 +92,16 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_vs_id++; result->next = vs->base.results; vs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -142,15 +149,14 @@ static int emit_hw_vs( struct svga_context *svga, id = result->id; } - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, + if (result != svga->state.hw_draw.vs) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, id ); if (ret) return ret; svga->dirty |= SVGA_NEW_VS_RESULT; - svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; svga->state.hw_draw.vs = result; } diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index b8ef137c01..0cd620189b 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svgadump/svga_shader_dump.h" @@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader, result->tokens = (const unsigned *)emit.buf; result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + result->id = UTIL_BITMASK_INVALID_INDEX; if (SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 896c90a89a..737a2213af 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -39,26 +39,24 @@ struct tgsi_token; struct svga_vs_compile_key { - ubyte need_prescale:1; - ubyte allow_psiz:1; unsigned zero_stride_vertex_elements; - ubyte num_zero_stride_vertex_elements:6; + unsigned need_prescale:1; + unsigned allow_psiz:1; + unsigned num_zero_stride_vertex_elements:6; }; struct svga_fs_compile_key { - boolean light_twoside:1; - boolean front_cw:1; - ubyte num_textures; - ubyte num_unnormalized_coords; + unsigned light_twoside:1; + unsigned front_cw:1; + unsigned num_textures:8; + unsigned num_unnormalized_coords:8; struct { - ubyte compare_mode : 1; - ubyte compare_func : 3; - ubyte unnormalized : 1; - - ubyte width_height_idx : 7; - - ubyte texture_target; + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned unnormalized:1; + unsigned width_height_idx:7; + unsigned texture_target:8; } tex[PIPE_MAX_SAMPLERS]; }; @@ -121,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key ) static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key ) { - return (const char *)&key->tex[key->num_textures].texture_target - - (const char *)key; + return (const char *)&key->tex[key->num_textures] - (const char *)key; } struct svga_shader_result * diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 1670da8bfa..dc5eb8fc60 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index ad47a56fba..075e4f9a0b 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -161,16 +161,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag) pipe_mutex_unlock(tr_ctx->draw_mutex); } -static INLINE boolean +static INLINE void trace_context_draw_arrays(struct pipe_context *_pipe, unsigned mode, unsigned start, unsigned count) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -181,19 +180,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_arrays(pipe, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_arrays(pipe, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -203,10 +198,9 @@ trace_context_draw_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -221,19 +215,15 @@ trace_context_draw_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } -static INLINE boolean +static INLINE void trace_context_draw_range_elements(struct pipe_context *_pipe, struct pipe_buffer *_indexBuffer, unsigned indexSize, @@ -247,10 +237,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, struct trace_buffer *tr_buf = trace_buffer(_indexBuffer); struct pipe_context *pipe = tr_ctx->pipe; struct pipe_buffer *indexBuffer = tr_buf->buffer; - boolean result; if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled) - return 0; + return; trace_context_draw_block(tr_ctx, 1); @@ -267,18 +256,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe, trace_dump_arg(uint, start); trace_dump_arg(uint, count); - result = pipe->draw_range_elements(pipe, - indexBuffer, - indexSize, minIndex, maxIndex, - mode, start, count); - - trace_dump_ret(bool, result); + pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); trace_dump_call_end(); trace_context_draw_block(tr_ctx, 2); - - return result; } diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 0102cc1876..86237e03bc 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state) trace_dump_member(uint, state, min_img_filter); trace_dump_member(uint, state, min_mip_filter); trace_dump_member(uint, state, mag_img_filter); - trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_mode); trace_dump_member(uint, state, compare_func); trace_dump_member(bool, state, normalized_coords); trace_dump_member(uint, state, prefilter); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 1c16042ee5..e2f981d051 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -32,7 +32,7 @@ struct tgsi_token; enum trace_shader_type { TRACE_SHADER_FRAGMENT = 0, TRACE_SHADER_VERTEX = 1, - TRACE_SHADER_GEOMETRY = 2, + TRACE_SHADER_GEOMETRY = 2 }; struct trace_shader diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index f7368bb95b..26a940593f 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -52,45 +52,15 @@ #endif /* _MSC_VER */ -#if defined(_MSC_VER) - -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -#ifndef __eglplatform_h_ -typedef __int32 int32_t; -#endif -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; - -#if defined(_WIN64) -typedef __int64 intptr_t; -typedef unsigned __int64 uintptr_t; -#else -typedef __int32 intptr_t; -typedef unsigned __int32 uintptr_t; -#endif - -#define INT64_C(__val) __val##i64 -#define UINT64_C(__val) __val##ui64 - -#ifndef __cplusplus -#define false 0 -#define true 1 -#define bool _Bool -typedef int _Bool; -#define __bool_true_false_are_defined 1 -#endif /* !__cplusplus */ - -#else +/* + * Alternative stdint.h and stdbool.h headers are supplied in include/c99 for + * systems that lack it. + */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS 1 #endif #include <stdint.h> #include <stdbool.h> -#endif #ifndef __HAIKU__ @@ -99,11 +69,7 @@ typedef unsigned short ushort; #endif typedef unsigned char ubyte; -#if 0 -#define boolean bool -#else typedef unsigned char boolean; -#endif #ifndef TRUE #define TRUE true #endif @@ -135,6 +101,17 @@ typedef unsigned char boolean; # endif #endif + +/* Function visibility */ +#ifndef PUBLIC +# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# define PUBLIC __attribute__((visibility("default"))) +# else +# define PUBLIC +# endif +#endif + + /* The __FUNCTION__ gcc variable is generally only used for debugging. * If we're not using gcc, define __FUNCTION__ as a cpp symbol here. */ diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 6c06fb9027..d2f8085b42 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -61,29 +61,37 @@ struct pipe_context { * VBO drawing (return false on fallbacks (temporary??)) */ /*@{*/ - boolean (*draw_arrays)( struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); + void (*draw_arrays)( struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); - boolean (*draw_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); + void (*draw_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); /* XXX: this is (probably) a temporary entrypoint, as the range * information should be available from the vertex_buffer state. * Using this to quickly evaluate a specialized path in the draw * module. */ - boolean (*draw_range_elements)( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); + void (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); /*@}*/ + /** + * Predicate subsequent rendering on occlusion query result + * \param query the query predicate, or NULL if no predicate + * \param mode one of PIPE_COND_RENDER_x + */ + void (*render_condition)( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ); /** * Query objects diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 2cda408fec..35f3830ebc 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -171,8 +171,6 @@ enum pipe_texture_target { */ #define PIPE_TEX_FILTER_NEAREST 0 #define PIPE_TEX_FILTER_LINEAR 1 -#define PIPE_TEX_FILTER_ANISO 2 - #define PIPE_TEX_COMPARE_NONE 0 #define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 @@ -355,6 +353,15 @@ enum pipe_transfer_usage { /** + * Conditional rendering modes + */ +#define PIPE_RENDER_COND_WAIT 0 +#define PIPE_RENDER_COND_NO_WAIT 1 +#define PIPE_RENDER_COND_BY_REGION_WAIT 2 +#define PIPE_RENDER_COND_BY_REGION_NO_WAIT 3 + + +/** * Point sprite coord modes */ #define PIPE_SPRITE_COORD_NONE 0 diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index f0a4de5df3..b8e001a6b0 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -266,6 +266,11 @@ struct pipe_screen { void (*video_surface_destroy)( struct pipe_video_surface *vsfc ); + /** + * Do any special operations to ensure buffer size is correct + */ + void (*update_buffer)( struct pipe_screen *ws, + void *context_private ); /** * Do any special operations to ensure frontbuffer contents are diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 7b19364b97..550e2abc32 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -141,6 +141,8 @@ struct tgsi_declaration_semantic }; #define TGSI_IMM_FLOAT32 0 +#define TGSI_IMM_UINT32 1 +#define TGSI_IMM_INT32 2 struct tgsi_immediate { @@ -153,6 +155,8 @@ struct tgsi_immediate union tgsi_immediate_data { float Float; + unsigned Uint; + int Int; }; #define TGSI_PROPERTY_GS_INPUT_PRIM 0 @@ -264,7 +268,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_NOT 85 #define TGSI_OPCODE_TRUNC 86 #define TGSI_OPCODE_SHL 87 -#define TGSI_OPCODE_SHR 88 + /* gap */ #define TGSI_OPCODE_AND 89 #define TGSI_OPCODE_OR 90 #define TGSI_OPCODE_MOD 91 @@ -289,7 +293,33 @@ struct tgsi_property_data { #define TGSI_OPCODE_KIL 116 /* conditional kill */ #define TGSI_OPCODE_END 117 /* aka HALT */ /* gap */ -#define TGSI_OPCODE_LAST 119 +#define TGSI_OPCODE_F2I 119 +#define TGSI_OPCODE_IDIV 120 +#define TGSI_OPCODE_IMAX 121 +#define TGSI_OPCODE_IMIN 122 +#define TGSI_OPCODE_INEG 123 +#define TGSI_OPCODE_ISGE 124 +#define TGSI_OPCODE_ISHR 125 +#define TGSI_OPCODE_ISLT 126 +#define TGSI_OPCODE_F2U 127 +#define TGSI_OPCODE_U2F 128 +#define TGSI_OPCODE_UADD 129 +#define TGSI_OPCODE_UDIV 130 +#define TGSI_OPCODE_UMAD 131 +#define TGSI_OPCODE_UMAX 132 +#define TGSI_OPCODE_UMIN 133 +#define TGSI_OPCODE_UMOD 134 +#define TGSI_OPCODE_UMUL 135 +#define TGSI_OPCODE_USEQ 136 +#define TGSI_OPCODE_USGE 137 +#define TGSI_OPCODE_USHR 138 +#define TGSI_OPCODE_USLT 139 +#define TGSI_OPCODE_USNE 140 +#define TGSI_OPCODE_SWITCH 141 +#define TGSI_OPCODE_CASE 142 +#define TGSI_OPCODE_DEFAULT 143 +#define TGSI_OPCODE_ENDSWITCH 144 +#define TGSI_OPCODE_LAST 145 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 8819936fca..f2e5f3fb23 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -44,9 +44,9 @@ GLboolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * cPriv, void *sharedContextPrivate) + __DRIcontext * cPriv, void *sharedContextPrivate) { - __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; + __DRIscreen *sPriv = cPriv->driScreenPriv; struct dri_screen *screen = dri_screen(sPriv); struct dri_context *ctx = NULL; struct st_context *st_share = NULL; @@ -97,7 +97,7 @@ dri_create_context(const __GLcontextModes * visual, } void -dri_destroy_context(__DRIcontextPrivate * cPriv) +dri_destroy_context(__DRIcontext * cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -116,7 +116,7 @@ dri_destroy_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_unbind_context(__DRIcontextPrivate * cPriv) +dri_unbind_context(__DRIcontext * cPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); @@ -133,9 +133,9 @@ dri_unbind_context(__DRIcontextPrivate * cPriv) } GLboolean -dri_make_current(__DRIcontextPrivate * cPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) +dri_make_current(__DRIcontext * cPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (cPriv) { struct dri_context *ctx = dri_context(cPriv); diff --git a/src/gallium/state_trackers/dri/dri_context.h b/src/gallium/state_trackers/dri/dri_context.h index 4650178734..13f497462f 100644 --- a/src/gallium/state_trackers/dri/dri_context.h +++ b/src/gallium/state_trackers/dri/dri_context.h @@ -44,10 +44,10 @@ struct dri_drawable; struct dri_context { /* dri */ - __DRIscreenPrivate *sPriv; - __DRIcontextPrivate *cPriv; - __DRIdrawablePrivate *dPriv; - __DRIdrawablePrivate *rPriv; + __DRIscreen *sPriv; + __DRIcontext *cPriv; + __DRIdrawable *dPriv; + __DRIdrawable *rPriv; driOptionCache optionCache; @@ -67,7 +67,7 @@ struct dri_context }; static INLINE struct dri_context * -dri_context(__DRIcontextPrivate * driContextPriv) +dri_context(__DRIcontext * driContextPriv) { return (struct dri_context *)driContextPriv->driverPrivate; } @@ -99,18 +99,18 @@ dri_unlock(struct dri_context *ctx) */ extern struct dri1_api_lock_funcs dri1_lf; -void dri_destroy_context(__DRIcontextPrivate * driContextPriv); +void dri_destroy_context(__DRIcontext * driContextPriv); -boolean dri_unbind_context(__DRIcontextPrivate * driContextPriv); +boolean dri_unbind_context(__DRIcontext * driContextPriv); boolean -dri_make_current(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); +dri_make_current(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); boolean dri_create_context(const __GLcontextModes * visual, - __DRIcontextPrivate * driContextPriv, + __DRIcontext * driContextPriv, void *sharedContextPrivate); /*********************************************************************** diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index 4b12243ddf..f131e77ac5 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -118,7 +118,7 @@ dri2_check_if_pixmap(__DRIbuffer *buffers, int count) * This will be called a drawable is known to have been resized. */ void -dri_get_buffers(__DRIdrawablePrivate * dPriv) +dri_get_buffers(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); @@ -268,6 +268,14 @@ void dri2_set_tex_buffer(__DRIcontext *pDRICtx, GLint target, } void +dri_update_buffer(struct pipe_screen *screen, void *context_private) +{ + struct dri_context *ctx = (struct dri_context *)context_private; + + dri_get_buffers(ctx->dPriv); +} + +void dri_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private) { @@ -299,8 +307,8 @@ dri_flush_frontbuffer(struct pipe_screen *screen, * This is called when we need to set up GL rendering to a new X window. */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap) { struct dri_screen *screen = sPriv->private; @@ -416,7 +424,7 @@ dri_swap_fences_push_back(struct dri_drawable *draw, } void -dri_destroy_buffer(__DRIdrawablePrivate * dPriv) +dri_destroy_buffer(__DRIdrawable * dPriv) { struct dri_drawable *drawable = dri_drawable(dPriv); struct pipe_fence_handle *fence; @@ -434,8 +442,8 @@ dri_destroy_buffer(__DRIdrawablePrivate * dPriv) static void dri1_update_drawables_locked(struct dri_context *ctx, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { if (ctx->stLostLock) { ctx->stLostLock = FALSE; @@ -458,8 +466,8 @@ dri1_update_drawables_locked(struct dri_context *ctx, static void dri1_propagate_drawable_change(struct dri_context *ctx) { - __DRIdrawablePrivate *dPriv = ctx->dPriv; - __DRIdrawablePrivate *rPriv = ctx->rPriv; + __DRIdrawable *dPriv = ctx->dPriv; + __DRIdrawable *rPriv = ctx->rPriv; boolean flushed = FALSE; if (dPriv && ctx->d_stamp != dPriv->lastStamp) { @@ -532,7 +540,7 @@ static void dri1_swap_copy(struct dri_context *ctx, struct pipe_surface *dst, struct pipe_surface *src, - __DRIdrawablePrivate * dPriv, const struct drm_clip_rect *bbox) + __DRIdrawable * dPriv, const struct drm_clip_rect *bbox) { struct pipe_context *pipe = ctx->pipe; struct drm_clip_rect clip; @@ -563,7 +571,7 @@ dri1_swap_copy(struct dri_context *ctx, static void dri1_copy_to_front(struct dri_context *ctx, struct pipe_surface *surf, - __DRIdrawablePrivate * dPriv, + __DRIdrawable * dPriv, const struct drm_clip_rect *sub_box, struct pipe_fence_handle **fence) { @@ -636,7 +644,7 @@ dri1_flush_frontbuffer(struct pipe_screen *screen, } void -dri_swap_buffers(__DRIdrawablePrivate * dPriv) +dri_swap_buffers(__DRIdrawable * dPriv) { struct dri_context *ctx; struct pipe_surface *back_surf; @@ -668,7 +676,7 @@ dri_swap_buffers(__DRIdrawablePrivate * dPriv) } void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h) { struct pipe_screen *screen = dri_screen(dPriv->driScreenPriv)->pipe_screen; struct drm_clip_rect sub_bbox; diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h index b910930db4..8bc59cb4c3 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.h +++ b/src/gallium/state_trackers/dri/dri_drawable.h @@ -41,8 +41,8 @@ struct dri_context; struct dri_drawable { /* dri */ - __DRIdrawablePrivate *dPriv; - __DRIscreenPrivate *sPriv; + __DRIdrawable *dPriv; + __DRIscreen *sPriv; unsigned attachments[8]; unsigned num_attachments; @@ -67,7 +67,7 @@ struct dri_drawable }; static INLINE struct dri_drawable * -dri_drawable(__DRIdrawablePrivate * driDrawPriv) +dri_drawable(__DRIdrawable * driDrawPriv) { return (struct dri_drawable *)driDrawPriv->driverPrivate; } @@ -76,22 +76,25 @@ dri_drawable(__DRIdrawablePrivate * driDrawPriv) * dri_drawable.c */ boolean -dri_create_buffer(__DRIscreenPrivate * sPriv, - __DRIdrawablePrivate * dPriv, +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, const __GLcontextModes * visual, boolean isPixmap); void +dri_update_buffer(struct pipe_screen *screen, void *context_private); + +void dri_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surf, void *context_private); -void dri_swap_buffers(__DRIdrawablePrivate * dPriv); +void dri_swap_buffers(__DRIdrawable * dPriv); void -dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h); +dri_copy_sub_buffer(__DRIdrawable * dPriv, int x, int y, int w, int h); -void dri_get_buffers(__DRIdrawablePrivate * dPriv); +void dri_get_buffers(__DRIdrawable * dPriv); -void dri_destroy_buffer(__DRIdrawablePrivate * dPriv); +void dri_destroy_buffer(__DRIdrawable * dPriv); void dri2_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv); diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index cb864d45d5..793db087ee 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -202,7 +202,7 @@ dri_fill_in_modes(struct dri_screen *screen, * Get information about previous buffer swaps. */ static int -dri_get_swap_info(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +dri_get_swap_info(__DRIdrawable * dPriv, __DRIswapInfo * sInfo) { if (dPriv == NULL || dPriv->driverPrivate == NULL || sInfo == NULL) return -1; @@ -220,7 +220,7 @@ dri_copy_version(struct dri1_api_version *dst, } static const __DRIconfig ** -dri_init_screen(__DRIscreenPrivate * sPriv) +dri_init_screen(__DRIscreen * sPriv) { struct dri_screen *screen; const __DRIconfig **configs; @@ -285,7 +285,7 @@ dri_init_screen(__DRIscreenPrivate * sPriv) * Returns the __GLcontextModes supported by this driver. */ static const __DRIconfig ** -dri_init_screen2(__DRIscreenPrivate * sPriv) +dri_init_screen2(__DRIscreen * sPriv) { struct dri_screen *screen; struct drm_create_screen_arg arg; @@ -308,6 +308,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv) } /* We need to hook in here */ + screen->pipe_screen->update_buffer = dri_update_buffer; screen->pipe_screen->flush_frontbuffer = dri_flush_frontbuffer; driParseOptionInfo(&screen->optionCache, @@ -319,7 +320,7 @@ dri_init_screen2(__DRIscreenPrivate * sPriv) } static void -dri_destroy_screen(__DRIscreenPrivate * sPriv) +dri_destroy_screen(__DRIscreen * sPriv) { struct dri_screen *screen = dri_screen(sPriv); @@ -346,4 +347,12 @@ PUBLIC const struct __DriverAPIRec driDriverAPI = { .InitScreen2 = dri_init_screen2, }; +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driLegacyExtension.base, + &driDRI2Extension.base, + NULL +}; + /* vim: set sw=3 ts=8 sts=3 expandtab: */ diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index f6c56d0f0c..03387a0e81 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -42,7 +42,7 @@ struct dri_screen { /* dri */ - __DRIscreenPrivate *sPriv; + __DRIscreen *sPriv; /** * Configuration cache with default values for all contexts @@ -63,7 +63,7 @@ struct dri_screen /** cast wrapper */ static INLINE struct dri_screen * -dri_screen(__DRIscreenPrivate * sPriv) +dri_screen(__DRIscreen * sPriv) { return (struct dri_screen *)sPriv->private; } diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 228ac9a20e..3caf56e924 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -1007,7 +1007,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) } -XVisualInfo * +PUBLIC XVisualInfo * glXChooseVisual( Display *dpy, int screen, int *list ) { XMesaVisual xmvis; @@ -1029,7 +1029,7 @@ glXChooseVisual( Display *dpy, int screen, int *list ) } -GLXContext +PUBLIC GLXContext glXCreateContext( Display *dpy, XVisualInfo *visinfo, GLXContext share_list, Bool direct ) { @@ -1084,7 +1084,7 @@ static XMesaBuffer MakeCurrent_PrevReadBuffer = 0; /* GLX 1.3 and later */ -Bool +PUBLIC Bool glXMakeContextCurrent( Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx ) { @@ -1180,21 +1180,21 @@ glXMakeContextCurrent( Display *dpy, GLXDrawable draw, } -Bool +PUBLIC Bool glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ) { return glXMakeContextCurrent( dpy, drawable, drawable, ctx ); } -GLXContext +PUBLIC GLXContext glXGetCurrentContext(void) { return GetCurrentContext(); } -Display * +PUBLIC Display * glXGetCurrentDisplay(void) { GLXContext glxCtx = glXGetCurrentContext(); @@ -1203,14 +1203,14 @@ glXGetCurrentDisplay(void) } -Display * +PUBLIC Display * glXGetCurrentDisplayEXT(void) { return glXGetCurrentDisplay(); } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentDrawable(void) { GLXContext gc = glXGetCurrentContext(); @@ -1218,7 +1218,7 @@ glXGetCurrentDrawable(void) } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentReadDrawable(void) { GLXContext gc = glXGetCurrentContext(); @@ -1226,14 +1226,14 @@ glXGetCurrentReadDrawable(void) } -GLXDrawable +PUBLIC GLXDrawable glXGetCurrentReadDrawableSGI(void) { return glXGetCurrentReadDrawable(); } -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ) { XMesaVisual v; @@ -1258,7 +1258,7 @@ glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ) /*** GLX_MESA_pixmap_colormap ***/ -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap ) { @@ -1282,7 +1282,7 @@ glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, } -void +PUBLIC void glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) { XMesaBuffer b = XMesaFindBuffer(dpy, pixmap); @@ -1295,7 +1295,7 @@ glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) } -void +PUBLIC void glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, unsigned long mask ) { @@ -1309,7 +1309,7 @@ glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, } -Bool +PUBLIC Bool glXQueryExtension( Display *dpy, int *errorBase, int *eventBase ) { int op, ev, err; @@ -1324,7 +1324,7 @@ glXQueryExtension( Display *dpy, int *errorBase, int *eventBase ) } -void +PUBLIC void glXDestroyContext( Display *dpy, GLXContext ctx ) { GLXContext glxCtx = ctx; @@ -1340,7 +1340,7 @@ glXDestroyContext( Display *dpy, GLXContext ctx ) } -Bool +PUBLIC Bool glXIsDirect( Display *dpy, GLXContext ctx ) { GLXContext glxCtx = ctx; @@ -1350,7 +1350,7 @@ glXIsDirect( Display *dpy, GLXContext ctx ) -void +PUBLIC void glXSwapBuffers( Display *dpy, GLXDrawable drawable ) { XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable ); @@ -1377,7 +1377,7 @@ glXSwapBuffers( Display *dpy, GLXDrawable drawable ) /*** GLX_MESA_copy_sub_buffer ***/ -void +PUBLIC void glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, int x, int y, int width, int height ) { @@ -1391,7 +1391,7 @@ glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, } -Bool +PUBLIC Bool glXQueryVersion( Display *dpy, int *maj, int *min ) { (void) dpy; @@ -1608,7 +1608,7 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) } -int +PUBLIC int glXGetConfig( Display *dpy, XVisualInfo *visinfo, int attrib, int *value ) { @@ -1638,7 +1638,7 @@ glXGetConfig( Display *dpy, XVisualInfo *visinfo, } -void +PUBLIC void glXWaitGL( void ) { XMesaContext xmesa = XMesaGetCurrentContext(); @@ -1647,7 +1647,7 @@ glXWaitGL( void ) -void +PUBLIC void glXWaitX( void ) { XMesaContext xmesa = XMesaGetCurrentContext(); @@ -1664,7 +1664,7 @@ get_extensions( void ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXQueryExtensionsString( Display *dpy, int screen ) { (void) dpy; @@ -1675,7 +1675,7 @@ glXQueryExtensionsString( Display *dpy, int screen ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXQueryServerString( Display *dpy, int screen, int name ) { static char version[1000]; @@ -1700,7 +1700,7 @@ glXQueryServerString( Display *dpy, int screen, int name ) /* GLX 1.1 and later */ -const char * +PUBLIC const char * glXGetClientString( Display *dpy, int name ) { static char version[1000]; @@ -1728,7 +1728,7 @@ glXGetClientString( Display *dpy, int name ) */ -int +PUBLIC int glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, int attribute, int *value ) { @@ -1743,7 +1743,7 @@ glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, } -GLXFBConfig * +PUBLIC GLXFBConfig * glXGetFBConfigs( Display *dpy, int screen, int *nelements ) { XVisualInfo *visuals, visTemplate; @@ -1769,7 +1769,7 @@ glXGetFBConfigs( Display *dpy, int screen, int *nelements ) } -GLXFBConfig * +PUBLIC GLXFBConfig * glXChooseFBConfig( Display *dpy, int screen, const int *attribList, int *nitems ) { @@ -1798,7 +1798,7 @@ glXChooseFBConfig( Display *dpy, int screen, } -XVisualInfo * +PUBLIC XVisualInfo * glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ) { if (dpy && config) { @@ -1820,7 +1820,7 @@ glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ) } -GLXWindow +PUBLIC GLXWindow glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, const int *attribList ) { @@ -1840,7 +1840,7 @@ glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, } -void +PUBLIC void glXDestroyWindow( Display *dpy, GLXWindow window ) { XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable) window); @@ -1851,7 +1851,7 @@ glXDestroyWindow( Display *dpy, GLXWindow window ) /* XXX untested */ -GLXPixmap +PUBLIC GLXPixmap glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList ) { @@ -1961,7 +1961,7 @@ glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, } -void +PUBLIC void glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) { XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable)pixmap); @@ -1971,7 +1971,7 @@ glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) } -GLXPbuffer +PUBLIC GLXPbuffer glXCreatePbuffer( Display *dpy, GLXFBConfig config, const int *attribList ) { @@ -2034,7 +2034,7 @@ glXCreatePbuffer( Display *dpy, GLXFBConfig config, } -void +PUBLIC void glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ) { XMesaBuffer b = XMesaFindBuffer(dpy, pbuf); @@ -2044,7 +2044,7 @@ glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ) } -void +PUBLIC void glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, unsigned int *value ) { @@ -2090,7 +2090,7 @@ glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, } -GLXContext +PUBLIC GLXContext glXCreateNewContext( Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct ) { @@ -2124,7 +2124,7 @@ glXCreateNewContext( Display *dpy, GLXFBConfig config, } -int +PUBLIC int glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ) { GLXContext glxCtx = ctx; @@ -2153,7 +2153,7 @@ glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ) } -void +PUBLIC void glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2162,7 +2162,7 @@ glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ) } -void +PUBLIC void glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, unsigned long *mask ) { @@ -2177,7 +2177,7 @@ glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, /*** GLX_SGI_swap_control ***/ -int +PUBLIC int glXSwapIntervalSGI(int interval) { (void) interval; @@ -2190,7 +2190,7 @@ glXSwapIntervalSGI(int interval) static unsigned int FrameCounter = 0; -int +PUBLIC int glXGetVideoSyncSGI(unsigned int *count) { /* this is a bogus implementation */ @@ -2198,7 +2198,7 @@ glXGetVideoSyncSGI(unsigned int *count) return 0; } -int +PUBLIC int glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) { if (divisor <= 0 || remainder < 0) @@ -2215,7 +2215,7 @@ glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) /*** GLX_SGI_make_current_read ***/ -Bool +PUBLIC Bool glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) { return glXMakeContextCurrent( dpy, draw, read, ctx ); @@ -2233,7 +2233,7 @@ glXGetCurrentReadDrawableSGI(void) /*** GLX_SGIX_video_source ***/ #if defined(_VL_H) -GLXVideoSourceSGIX +PUBLIC GLXVideoSourceSGIX glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode) { (void) dpy; @@ -2245,7 +2245,7 @@ glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath pa return 0; } -void +PUBLIC void glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) { (void) dpy; @@ -2257,21 +2257,21 @@ glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) /*** GLX_EXT_import_context ***/ -void +PUBLIC void glXFreeContextEXT(Display *dpy, GLXContext context) { (void) dpy; (void) context; } -GLXContextID +PUBLIC GLXContextID glXGetContextIDEXT(const GLXContext context) { (void) context; return 0; } -GLXContext +PUBLIC GLXContext glXImportContextEXT(Display *dpy, GLXContextID contextID) { (void) dpy; @@ -2279,7 +2279,7 @@ glXImportContextEXT(Display *dpy, GLXContextID contextID) return 0; } -int +PUBLIC int glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value) { (void) dpy; @@ -2293,20 +2293,20 @@ glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *val /*** GLX_SGIX_fbconfig ***/ -int +PUBLIC int glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value) { return glXGetFBConfigAttrib(dpy, config, attribute, value); } -GLXFBConfigSGIX * +PUBLIC GLXFBConfigSGIX * glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements) { return (GLXFBConfig *) glXChooseFBConfig(dpy, screen, attrib_list, nelements); } -GLXPixmap +PUBLIC GLXPixmap glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap) { XMesaVisual xmvis = (XMesaVisual) config; @@ -2315,7 +2315,7 @@ glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pi } -GLXContext +PUBLIC GLXContext glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct) { XMesaVisual xmvis = (XMesaVisual) config; @@ -2344,14 +2344,14 @@ glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_ } -XVisualInfo * +PUBLIC XVisualInfo * glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config) { return glXGetVisualFromFBConfig(dpy, config); } -GLXFBConfigSGIX +PUBLIC GLXFBConfigSGIX glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) { XMesaVisual xmvis = find_glx_visual(dpy, vis); @@ -2367,7 +2367,7 @@ glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) /*** GLX_SGIX_pbuffer ***/ -GLXPbufferSGIX +PUBLIC GLXPbufferSGIX glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attribList) @@ -2406,7 +2406,7 @@ glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, } -void +PUBLIC void glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); @@ -2416,7 +2416,7 @@ glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) } -int +PUBLIC int glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value) { const XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); @@ -2449,7 +2449,7 @@ glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigne } -void +PUBLIC void glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2460,7 +2460,7 @@ glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) } -void +PUBLIC void glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) { XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); @@ -2476,7 +2476,7 @@ glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) /*** GLX_SGI_cushion ***/ -void +PUBLIC void glXCushionSGI(Display *dpy, Window win, float cushion) { (void) dpy; @@ -2488,7 +2488,7 @@ glXCushionSGI(Display *dpy, Window win, float cushion) /*** GLX_SGIX_video_resize ***/ -int +PUBLIC int glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window) { (void) dpy; @@ -2498,7 +2498,7 @@ glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window return 0; } -int +PUBLIC int glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h) { (void) dpy; @@ -2511,7 +2511,7 @@ glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, i return 0; } -int +PUBLIC int glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h) { (void) dpy; @@ -2524,7 +2524,7 @@ glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, i return 0; } -int +PUBLIC int glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh) { (void) dpy; @@ -2537,7 +2537,7 @@ glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *d return 0; } -int +PUBLIC int glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) { (void) dpy; @@ -2552,7 +2552,7 @@ glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) /*** GLX_SGIX_dmbuffer **/ #if defined(_DM_BUFFER_H_) -Bool +PUBLIC Bool glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer) { (void) dpy; @@ -2566,7 +2566,7 @@ glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params /*** GLX_SGIX_swap_group ***/ -void +PUBLIC void glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) { (void) dpy; @@ -2578,7 +2578,7 @@ glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) /*** GLX_SGIX_swap_barrier ***/ -void +PUBLIC void glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) { (void) dpy; @@ -2586,7 +2586,7 @@ glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) (void) barrier; } -Bool +PUBLIC Bool glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) { (void) dpy; @@ -2599,7 +2599,7 @@ glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) /*** GLX_SUN_get_transparent_index ***/ -Status +PUBLIC Status glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent) { (void) dpy; @@ -2617,7 +2617,7 @@ glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *p * Release the depth, stencil, accum buffers attached to a GLXDrawable * (a window or pixmap) prior to destroying the GLXDrawable. */ -Bool +PUBLIC Bool glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) { XMesaBuffer b = XMesaFindBuffer(dpy, d); @@ -2630,7 +2630,7 @@ glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) /*** GLX_EXT_texture_from_pixmap ***/ -void +PUBLIC void glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list) { @@ -2639,7 +2639,7 @@ glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, XMesaBindTexImage(dpy, b, buffer, attrib_list); } -void +PUBLIC void glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) { XMesaBuffer b = XMesaFindBuffer(dpy, drawable); diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index ca7d88c922..84d47b12ed 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -34,6 +34,7 @@ #include <string.h> #include "GL/glx.h" #include "glapi/glapi.h" +#include "pipe/p_compiler.h" struct name_address_pair { @@ -208,6 +209,7 @@ glXGetProcAddressARB(const GLubyte *procName) /* GLX 1.4 */ +PUBLIC void (*glXGetProcAddress(const GLubyte *procName))() { return glXGetProcAddressARB(procName); diff --git a/src/gallium/state_trackers/glx/xlib/glx_usefont.c b/src/gallium/state_trackers/glx/xlib/glx_usefont.c index acc64df62b..16e5ce642f 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_usefont.c +++ b/src/gallium/state_trackers/glx/xlib/glx_usefont.c @@ -33,6 +33,7 @@ #include "main/context.h" #include "main/imports.h" #include <GL/glx.h> +#include "pipe/p_compiler.h" /* Some debugging info. */ @@ -210,7 +211,7 @@ isvalid(XFontStruct * fs, unsigned int which) } -void +PUBLIC void glXUseXFont(Font font, int first, int count, int listbase) { Display *dpy; diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index d4fdd43688..8498a90812 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -28,14 +28,27 @@ if 'python' in env['statetrackers']: 'X11', ]) + sources = [ + 'gallium.i', + 'st_device.c', + 'st_sample.c', + ] + + drivers = [ + trace + ] + + if 'llvmpipe' in env['drivers']: + env.Tool('llvm') + sources += ['st_llvmpipe_winsys.c'] + drivers += [llvmpipe] + else: + sources += ['st_softpipe_winsys.c'] + drivers += [softpipe] + pyst = env.ConvenienceLibrary( target = 'pyst', - source = [ - 'gallium.i', - 'st_device.c', - 'st_sample.c', - 'st_softpipe_winsys.c', - ], + source = sources, ) env['no_import_lib'] = 1 @@ -45,5 +58,5 @@ if 'python' in env['statetrackers']: source = [ 'st_hardpipe_winsys.c', ], - LIBS = [pyst, softpipe, trace] + gallium + env['LIBS'], + LIBS = [pyst] + drivers + gallium + env['LIBS'], ) diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i index 2dc995adb0..0eba488a07 100644 --- a/src/gallium/state_trackers/python/p_device.i +++ b/src/gallium/state_trackers/python/p_device.i @@ -87,6 +87,10 @@ struct st_device { enum pipe_texture_target target, unsigned tex_usage, unsigned geom_flags ) { + /* We can't really display surfaces with the python statetracker so mask + * out that usage */ + tex_usage &= ~PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + return $self->screen->is_format_supported( $self->screen, format, target, @@ -110,6 +114,11 @@ struct st_device { unsigned tex_usage = 0 ) { struct pipe_texture templat; + + /* We can't really display surfaces with the python statetracker so mask + * out that usage */ + tex_usage &= ~PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + memset(&templat, 0, sizeof(templat)); templat.format = format; templat.width0 = width; @@ -118,6 +127,7 @@ struct st_device { templat.last_level = last_level; templat.target = target; templat.tex_usage = tex_usage; + return $self->screen->texture_create($self->screen, &templat); } diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py index 1ceead5f17..a07cf557f2 100644 --- a/src/gallium/state_trackers/python/samples/gs.py +++ b/src/gallium/state_trackers/python/samples/gs.py @@ -136,10 +136,10 @@ def test(dev): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() zbuf = dev.texture_create( - PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, width, height, tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, ).get_surface() diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index af80426dc6..e5e168bdc8 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -136,10 +136,10 @@ def test(dev): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() zbuf = dev.texture_create( - PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, width, height, tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL, ).get_surface() diff --git a/src/gallium/state_trackers/python/st_llvmpipe_winsys.c b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c new file mode 100644 index 0000000000..0096b18c99 --- /dev/null +++ b/src/gallium/state_trackers/python/st_llvmpipe_winsys.c @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Llvmpipe support. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "llvmpipe/lp_winsys.h" +#include "st_winsys.h" + + +static boolean +llvmpipe_ws_is_displaytarget_format_supported( struct llvmpipe_winsys *ws, + enum pipe_format format ) +{ + return FALSE; +} + + +static void * +llvmpipe_ws_displaytarget_map(struct llvmpipe_winsys *ws, + struct llvmpipe_displaytarget *dt, + unsigned flags ) +{ + assert(0); + return NULL; +} + + +static void +llvmpipe_ws_displaytarget_unmap(struct llvmpipe_winsys *ws, + struct llvmpipe_displaytarget *dt ) +{ + assert(0); +} + + +static void +llvmpipe_ws_displaytarget_destroy(struct llvmpipe_winsys *winsys, + struct llvmpipe_displaytarget *dt) +{ + assert(0); +} + + +static struct llvmpipe_displaytarget * +llvmpipe_ws_displaytarget_create(struct llvmpipe_winsys *winsys, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + return NULL; +} + + +static void +llvmpipe_ws_displaytarget_display(struct llvmpipe_winsys *winsys, + struct llvmpipe_displaytarget *dt, + void *context_private) +{ + assert(0); +} + + +static void +llvmpipe_ws_destroy(struct llvmpipe_winsys *winsys) +{ + FREE(winsys); +} + + +static struct pipe_screen * +st_llvmpipe_screen_create(void) +{ + static struct llvmpipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(llvmpipe_winsys); + if (!winsys) + goto no_winsys; + + winsys->destroy = llvmpipe_ws_destroy; + winsys->is_displaytarget_format_supported = llvmpipe_ws_is_displaytarget_format_supported; + winsys->displaytarget_create = llvmpipe_ws_displaytarget_create; + winsys->displaytarget_map = llvmpipe_ws_displaytarget_map; + winsys->displaytarget_unmap = llvmpipe_ws_displaytarget_unmap; + winsys->displaytarget_display = llvmpipe_ws_displaytarget_display; + winsys->displaytarget_destroy = llvmpipe_ws_displaytarget_destroy; + + screen = llvmpipe_create_screen(winsys); + if (!screen) + goto no_screen; + + return screen; + +no_screen: + FREE(winsys); +no_winsys: + return NULL; +} + + +static struct pipe_context * +st_llvmpipe_context_create(struct pipe_screen *screen) +{ + return llvmpipe_create(screen); +} + + +const struct st_winsys st_softpipe_winsys = { + &st_llvmpipe_screen_create, + &st_llvmpipe_context_create, +}; diff --git a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py index eed6cdd1e6..8d3bf9d4d7 100644 --- a/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/fragment-shader/fragment-shader.py @@ -114,7 +114,7 @@ def test(dev, name): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py index 41bebd0604..01bf5a3210 100644 --- a/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py +++ b/src/gallium/state_trackers/python/tests/regress/vertex-shader/vertex-shader.py @@ -114,7 +114,7 @@ def test(dev, name): cbuf = dev.texture_create( PIPE_FORMAT_X8R8G8B8_UNORM, width, height, - tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + tex_usage=PIPE_TEXTURE_USAGE_RENDER_TARGET, ).get_surface() fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 278ba6d46e..1112ad9839 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -644,7 +644,7 @@ VGint image_sampler_filter(struct vg_context *ctx) return PIPE_TEX_FILTER_NEAREST; break; case VG_IMAGE_QUALITY_BETTER: - /*return PIPE_TEX_FILTER_ANISO;*/ + /* possibly use anisotropic filtering */ return PIPE_TEX_FILTER_LINEAR; break; default: diff --git a/src/gallium/state_trackers/vega/vg_tracker.h b/src/gallium/state_trackers/vega/vg_tracker.h index 5457631106..0f0c27f455 100644 --- a/src/gallium/state_trackers/vega/vg_tracker.h +++ b/src/gallium/state_trackers/vega/vg_tracker.h @@ -45,15 +45,19 @@ struct pipe_fence_handle; struct pipe_surface; +PUBLIC struct vg_context *st_create_context(struct pipe_context *pipe, const void *visual, struct vg_context *share); +PUBLIC void st_destroy_context( struct vg_context *st ); +PUBLIC void st_copy_context_state(struct vg_context *dst, struct vg_context *src, uint mask); +PUBLIC struct st_framebuffer *st_create_framebuffer(const void *visual, enum pipe_format colorFormat, enum pipe_format depthFormat, @@ -61,47 +65,63 @@ struct st_framebuffer *st_create_framebuffer(const void *visual, uint width, uint height, void *privateData); +PUBLIC void st_resize_framebuffer(struct st_framebuffer *stfb, uint width, uint height); +PUBLIC void st_set_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf); +PUBLIC void st_get_framebuffer_dimensions( struct st_framebuffer *stfb, uint *width, uint *height); +PUBLIC int st_bind_texture_surface(struct pipe_surface *ps, int target, int level, enum pipe_format format); +PUBLIC int st_unbind_texture_surface(struct pipe_surface *ps, int target, int level); +PUBLIC int st_get_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface **surf); +PUBLIC int st_get_framebuffer_texture(struct st_framebuffer *stfb, uint surfIndex, struct pipe_texture **tex); +PUBLIC void *st_framebuffer_private(struct st_framebuffer *stfb); +PUBLIC void st_unreference_framebuffer(struct st_framebuffer *stfb); +PUBLIC void st_make_current(struct vg_context *st, struct st_framebuffer *draw, struct st_framebuffer *read); +PUBLIC struct vg_context *st_get_current(void); +PUBLIC void st_flush(struct vg_context *st, uint pipeFlushFlags, struct pipe_fence_handle **fence); +PUBLIC void st_finish(struct vg_context *st); +PUBLIC void st_notify_swapbuffers(struct st_framebuffer *stfb); +PUBLIC void st_notify_swapbuffers_complete(struct st_framebuffer *stfb); /** Generic function type */ typedef void (*st_proc)(); +PUBLIC st_proc st_get_proc_address(const char *procname); #endif diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index e390ce29ae..650d2c0d1d 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -123,7 +123,8 @@ crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, drm_mode.vrefresh = mode->VRefresh; if (!mode->name) xf86SetModeDefaultName(mode); - strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN); + strncpy(drm_mode.name, mode->name, DRM_DISPLAY_MODE_LEN - 1); + drm_mode.name[DRM_DISPLAY_MODE_LEN - 1] = '\0'; ret = drmModeSetCrtc(ms->fd, drm_crtc->crtc_id, ms->fb_id, x, y, &drm_connector->connector_id, 1, &drm_mode); diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 4d169a1d14..b02fe68f31 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -181,8 +181,7 @@ drv_crtc_resize(ScrnInfoPtr pScrn, int width, int height) if (!pScreen->ModifyPixmapHeader(rootPixmap, width, height, -1, -1, -1, NULL)) return FALSE; - /* HW dependent - FIXME */ - pScrn->displayWidth = pScrn->virtualX; + pScrn->displayWidth = rootPixmap->devKind / (rootPixmap->drawable.bitsPerPixel / 8); /* now create new frontbuffer */ return ms->create_front_buffer(pScrn) && ms->bind_front_buffer(pScrn); diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index aa68570b9c..d9432babf1 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -811,34 +811,7 @@ xorg_exa_set_shared_usage(PixmapPtr pPixmap) return 0; } -unsigned -xorg_exa_get_pixmap_handle(PixmapPtr pPixmap, unsigned *stride_out) -{ - ScreenPtr pScreen = pPixmap->drawable.pScreen; - ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; - modesettingPtr ms = modesettingPTR(pScrn); - struct exa_pixmap_priv *priv; - unsigned handle; - unsigned stride; - if (!ms->exa) { - FatalError("NO MS->EXA\n"); - return 0; - } - - priv = exaGetPixmapDriverPrivate(pPixmap); - - if (!priv) { - FatalError("NO PIXMAP PRIVATE\n"); - return 0; - } - - ms->api->local_handle_from_texture(ms->api, ms->screen, priv->tex, &stride, &handle); - if (stride_out) - *stride_out = stride; - - return handle; -} static Bool size_match( int width, int tex_width ) diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index c0cfbe6061..4d5d4780dc 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -135,9 +135,6 @@ typedef struct _modesettingRec struct pipe_texture * xorg_exa_get_texture(PixmapPtr pPixmap); -unsigned -xorg_exa_get_pixmap_handle(PixmapPtr pPixmap, unsigned *stride); - int xorg_exa_set_displayed_usage(PixmapPtr pPixmap); diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 7106a06492..e5912ef77f 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -87,6 +87,7 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, case 0x60: init = nv40_screen_create; break; + case 0x50: case 0x80: case 0x90: case 0xa0: @@ -164,6 +165,7 @@ nouveau_drm_create_context(struct drm_api *api, struct pipe_screen *pscreen) case 0x60: init = nv40_create; break; + case 0x50: case 0x80: case 0x90: case 0xa0: diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index d2367b245a..385fa857b5 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -146,16 +146,17 @@ static void *radeon_buffer_map(struct pipe_winsys *ws, (struct radeon_pipe_buffer*)buffer; int write = 0; - if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { - priv->flush_cb(priv->flush_data); - } - if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { uint32_t domain; if (radeon_bo_is_busy(radeon_buffer->bo, &domain)) return NULL; } + + if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) { + priv->flush_cb(priv->flush_data); + } + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { write = 1; } @@ -280,58 +281,3 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) return radeon_ws; } -#if 0 -static struct pipe_buffer *radeon_buffer_from_handle(struct radeon_screen *radeon_screen, - uint32_t handle) -{ - struct radeon_pipe_buffer *radeon_buffer; - struct radeon_bo *bo = NULL; - - bo = radeon_bo_open(radeon_screen->bom, handle, 0, 0, 0, 0); - if (bo == NULL) { - return NULL; - } - radeon_buffer = calloc(1, sizeof(struct radeon_pipe_buffer)); - if (radeon_buffer == NULL) { - radeon_bo_unref(bo); - return NULL; - } - pipe_reference_init(&radeon_buffer->base.reference, 1); - radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL; - radeon_buffer->bo = bo; - return &radeon_buffer->base; -} - -struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, - uint32_t handle, - enum pipe_format format, - int w, int h, int pitch) -{ - struct pipe_screen *pipe_screen = radeon_context->pipe_screen; - struct pipe_winsys *pipe_winsys = radeon_context->pipe_winsys; - struct pipe_texture tmpl; - struct pipe_surface *ps; - struct pipe_texture *pt; - struct pipe_buffer *pb; - - pb = radeon_buffer_from_handle(radeon_context->radeon_screen, handle); - if (pb == NULL) { - return NULL; - } - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; - tmpl.target = PIPE_TEXTURE_2D; - tmpl.width0 = w; - tmpl.height0 = h; - tmpl.depth0 = 1; - tmpl.format = format; - - pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb); - if (pt == NULL) { - pipe_buffer_reference(&pb, NULL); - } - ps = pipe_screen->get_tex_surface(pipe_screen, pt, 0, 0, 0, - PIPE_BUFFER_USAGE_GPU_WRITE); - return ps; -} -#endif diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript index f7ce400a7a..1e5d8ff7fe 100644 --- a/src/gallium/winsys/drm/vmware/xorg/SConscript +++ b/src/gallium/winsys/drm/vmware/xorg/SConscript @@ -44,6 +44,7 @@ if env['platform'] == 'linux': sources = [ 'vmw_ioctl.c', 'vmw_screen.c', + 'vmw_video.c', 'vmw_xorg.c', ] diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 599973ce12..420dccc92c 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -751,24 +751,18 @@ xlib_eglReleaseTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, static EGLBoolean xlib_eglSwapBuffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw) { - /* error checking step: */ - if (!_eglSwapBuffers(drv, dpy, draw)) - return EGL_FALSE; - - { - struct xlib_egl_surface *xsurf = lookup_surface(draw); - struct pipe_winsys *pws = xsurf->winsys; - struct pipe_surface *psurf; + struct xlib_egl_surface *xsurf = lookup_surface(draw); + struct pipe_winsys *pws = xsurf->winsys; + struct pipe_surface *psurf; - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, - &psurf); + st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, + &psurf); - st_notify_swapbuffers(xsurf->Framebuffer); + st_notify_swapbuffers(xsurf->Framebuffer); - display_surface(pws, psurf, xsurf); + display_surface(pws, psurf, xsurf); - check_and_update_buffer_size(xsurf); - } + check_and_update_buffer_size(xsurf); return EGL_TRUE; } diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index 713841aeb1..a4dabb7804 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -3,50 +3,66 @@ Import('*') -if env['platform'] == 'linux' \ - and 'mesa' in env['statetrackers'] \ - and set(('softpipe', 'llvmpipe', 'i915', 'trace')).intersection(env['drivers']) \ - and not env['dri']: - - env = env.Clone() - - env.Append(CPPPATH = [ - '#/src/mesa', - '#/src/mesa/main', - '#src/gallium/state_trackers/glx/xlib', - ]) - - env.Append(CPPDEFINES = ['USE_XSHM']) - - sources = [ - 'xlib.c', - ] - - drivers = [trace] - - if 'softpipe' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') - sources += ['xlib_softpipe.c'] - drivers += [softpipe] - - if 'llvmpipe' in env['drivers']: - env.Tool('llvm') - if 'LLVM_VERSION' in env: - env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') - env.Tool('udis86') - sources += ['xlib_llvmpipe.c'] - drivers += [llvmpipe] - - if 'cell' in env['drivers']: - env.Append(CPPDEFINES = 'GALLIUM_CELL') - sources += ['xlib_cell.c'] - drivers += [cell] - - # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions - libgl = env.SharedLibrary( - target ='GL', - source = sources, - LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'], - ) +if env['platform'] != 'linux': + Return() +if 'mesa' not in env['statetrackers']: + print 'warning: Mesa state tracker disabled: skipping build of xlib libGL.so' + Return() + +if env['dri']: + print 'warning: DRI enabled: skipping build of xlib libGL.so' + Return() + +if 'trace' not in env['drivers']: + print 'warning: trace pipe driver disabled: skipping build of xlib libGL.so' + Return() + +if not set(('softpipe', 'llvmpipe', 'trace')).intersection(env['drivers']): + print 'warning: no supported pipe driver: skipping build of xlib libGL.so' + Return() + +env = env.Clone() + +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', + '#src/gallium/state_trackers/glx/xlib', +]) + +env.Append(CPPDEFINES = ['USE_XSHM']) + +sources = [ + 'xlib.c', +] + +drivers = [trace] + +if 'softpipe' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') + sources += ['xlib_softpipe.c'] + drivers += [softpipe] + +if 'llvmpipe' in env['drivers']: + env.Tool('llvm') + if 'LLVM_VERSION' in env: + env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') + env.Tool('udis86') + sources += ['xlib_llvmpipe.c'] + drivers += [llvmpipe] + +if 'cell' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_CELL') + sources += ['xlib_cell.c'] + drivers += [cell] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +libgl = env.SharedLibrary( + target ='GL', + source = sources, + LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'], +) + +if not env['dri']: + # Only install this libGL.so if DRI not enabled env.InstallSharedLibrary(libgl, version=(1, 5)) |