diff options
Diffstat (limited to 'src')
327 files changed, 9440 insertions, 11402 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 8f6ca15dfa..1c07ab1365 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -422,9 +422,9 @@ aaline_create_texture(struct aaline_stage *aaline) /* This texture is new, no need to flush. */ - transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0, + transfer = pipe->get_tex_transfer(pipe, aaline->texture, 0, level, 0, PIPE_TRANSFER_WRITE, 0, 0, size, size); - data = screen->transfer_map(screen, transfer); + data = pipe->transfer_map(pipe, transfer); if (data == NULL) return FALSE; @@ -448,8 +448,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d0d99aa331..38c22bf4e9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -374,19 +374,21 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; /* XXX: want to avoid flushing just because we use stipple: + * + * Flush should no longer be necessary if driver is properly + * interleaving drawing and transfers on a given context: */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, 32, 32); - data = screen->transfer_map(screen, transfer); + transfer = pipe->get_tex_transfer(pipe, pstip->texture, 0, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = pipe->transfer_map(pipe, transfer); /* * Load alpha texture. @@ -408,8 +410,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } diff --git a/src/gallium/auxiliary/os/os_llvm.h b/src/gallium/auxiliary/gallivm/lp_bld.h index d5edfbfe92..70a4960f91 100644 --- a/src/gallium/auxiliary/os/os_llvm.h +++ b/src/gallium/auxiliary/gallivm/lp_bld.h @@ -31,8 +31,8 @@ */ -#ifndef OS_LLVM_H -#define OS_LLVM_H +#ifndef LP_BLD_H +#define LP_BLD_H #include <llvm-c/Core.h> @@ -40,8 +40,8 @@ /** Set version to 0 if missing to avoid #ifdef HAVE_LLVM everywhere */ #ifndef HAVE_LLVM -#define HAVE_LLVM 0x0 +#define HAVE_LLVM 0x0207 #endif -#endif /* OS_LLVM_H */ +#endif /* LP_BLD_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h index fe3cedcc48..0f99fec65e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h @@ -35,7 +35,7 @@ #define LP_BLD_ALPHA_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_alpha_state; struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 90c84c6a4e..8e8fcccf56 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -361,12 +361,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder, LLVMValueRef c8; LLVMValueRef ab; - c8 = lp_build_int_const_scalar(i16_type, 8); + c8 = lp_build_const_int_vec(i16_type, 8); #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), ""); + b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else @@ -374,7 +374,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder, /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), ""); + ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), ""); #endif @@ -429,7 +429,7 @@ lp_build_mul(struct lp_build_context *bld, } if(type.fixed) - shift = lp_build_int_const_scalar(type, type.width/2); + shift = lp_build_const_int_vec(type, type.width/2); else shift = NULL; @@ -491,7 +491,7 @@ lp_build_mul_imm(struct lp_build_context *bld, * for Inf and NaN. */ unsigned mantissa = lp_mantissa(bld->type); - factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa); + factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa); a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), ""); a = LLVMBuildAdd(bld->builder, a, factor, ""); a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), ""); @@ -499,12 +499,12 @@ lp_build_mul_imm(struct lp_build_context *bld, #endif } else { - factor = lp_build_const_scalar(bld->type, shift); + factor = lp_build_const_vec(bld->type, shift); return LLVMBuildShl(bld->builder, a, factor, ""); } } - factor = lp_build_const_scalar(bld->type, (double)b); + factor = lp_build_const_vec(bld->type, (double)b); return lp_build_mul(bld, a, factor); } @@ -567,7 +567,7 @@ lp_build_lerp(struct lp_build_context *bld, * but it will be wrong for other uses. Basically we need a more * powerful lp_type, capable of further distinguishing the values * interpretation from the value storage. */ - res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), ""); + res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), ""); return res; } @@ -689,7 +689,7 @@ lp_build_abs(struct lp_build_context *bld, /* vector of floats */ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); unsigned long long absMask = ~(1ULL << (type.width - 1)); - LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask)); + LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask)); a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); a = LLVMBuildAnd(bld->builder, a, mask, ""); a = LLVMBuildBitCast(bld->builder, a, vec_type, ""); @@ -720,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld, } +/** Return -1, 0 or +1 depending on the sign of a */ LLVMValueRef lp_build_sgn(struct lp_build_context *bld, LLVMValueRef a) { const struct lp_type type = bld->type; - LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMValueRef cond; LLVMValueRef res; @@ -735,27 +735,42 @@ lp_build_sgn(struct lp_build_context *bld, res = bld->one; } else if(type.floating) { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMTypeRef vec_type; + LLVMTypeRef int_type; + LLVMValueRef mask; LLVMValueRef sign; LLVMValueRef one; - sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); + unsigned long long maskBit = (unsigned long long)1 << (type.width - 1); + + if (type.length == 1) { + int_type = lp_build_int_elem_type(type); + vec_type = lp_build_elem_type(type); + mask = LLVMConstInt(int_type, maskBit, 0); + } + else { + /* vector */ + int_type = lp_build_int_vec_type(type); + vec_type = lp_build_vec_type(type); + mask = lp_build_const_int_vec(type, maskBit); + } + + /* Take the sign bit and add it to 1 constant */ + sign = LLVMBuildBitCast(bld->builder, a, int_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - one = LLVMConstBitCast(bld->one, int_vec_type); + one = LLVMConstBitCast(bld->one, int_type); res = LLVMBuildOr(bld->builder, sign, one, ""); res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); } else { - LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0); + LLVMValueRef minus_one = lp_build_const_vec(type, -1.0); cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero); res = lp_build_select(bld, cond, bld->one, minus_one); } /* Handle zero */ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero); - res = lp_build_select(bld, cond, bld->zero, bld->one); + res = lp_build_select(bld, cond, bld->zero, res); return res; } @@ -774,8 +789,8 @@ lp_build_set_sign(struct lp_build_context *bld, const struct lp_type type = bld->type; LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1); - LLVMValueRef mask = lp_build_int_const_scalar(type, + LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1); + LLVMValueRef mask = lp_build_const_int_vec(type, ~((unsigned long long) 1 << (type.width - 1))); LLVMValueRef val, res; @@ -915,7 +930,10 @@ lp_build_floor(struct lp_build_context *bld, assert(type.floating); if (type.length == 1) { - return LLVMBuildFPTrunc(bld->builder, a, LLVMFloatType(), ""); + LLVMValueRef res; + res = lp_build_ifloor(bld, a); + res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), ""); + return res; } if(util_cpu_caps.has_sse4_1) @@ -978,7 +996,7 @@ lp_build_itrunc(struct lp_build_context *bld, if (type.length == 1) { LLVMTypeRef int_type = LLVMIntType(type.width); - return LLVMBuildFPTrunc(bld->builder, a, int_type, ""); + return LLVMBuildFPToSI(bld->builder, a, int_type, ""); } else { LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); @@ -1016,7 +1034,7 @@ lp_build_iround(struct lp_build_context *bld, } else { LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef half; @@ -1025,7 +1043,7 @@ lp_build_iround(struct lp_build_context *bld, sign = LLVMBuildAnd(bld->builder, sign, mask, ""); /* sign * 0.5 */ - half = lp_build_const_scalar(type, 0.5); + half = lp_build_const_vec(type, 0.5); half = LLVMBuildBitCast(bld->builder, half, int_vec_type, ""); half = LLVMBuildOr(bld->builder, sign, half, ""); half = LLVMBuildBitCast(bld->builder, half, vec_type, ""); @@ -1068,18 +1086,18 @@ lp_build_ifloor(struct lp_build_context *bld, /* Take the sign bit and add it to 1 constant */ LLVMTypeRef vec_type = lp_build_vec_type(type); unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); LLVMValueRef sign; LLVMValueRef offset; /* sign = a < 0 ? ~0 : 0 */ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); sign = LLVMBuildAnd(bld->builder, sign, mask, ""); - sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), ""); + sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), ""); lp_build_name(sign, "floor.sign"); /* offset = -0.99999(9)f */ - offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); + offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa)); offset = LLVMConstBitCast(offset, int_vec_type); /* offset = a < 0 ? -0.99999(9)f : 0.0f */ @@ -1250,7 +1268,7 @@ lp_build_exp(struct lp_build_context *bld, LLVMValueRef x) { /* log2(e) = 1/log(2) */ - LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634); + LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634); return lp_build_mul(bld, log2e, lp_build_exp2(bld, x)); } @@ -1264,7 +1282,7 @@ lp_build_log(struct lp_build_context *bld, LLVMValueRef x) { /* log(2) */ - LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529); + LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529); return lp_build_mul(bld, log2, lp_build_exp2(bld, x)); } @@ -1300,7 +1318,7 @@ lp_build_polynomial(struct lp_build_context *bld, if (type.length == 1) coeff = LLVMConstReal(float_type, coeffs[i]); else - coeff = lp_build_const_scalar(type, coeffs[i]); + coeff = lp_build_const_vec(type, coeffs[i]); if(res) res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res)); @@ -1357,11 +1375,11 @@ lp_build_exp2_approx(struct lp_build_context *bld, assert(type.floating && type.width == 32); - x = lp_build_min(bld, x, lp_build_const_scalar(type, 129.0)); - x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999)); + x = lp_build_min(bld, x, lp_build_const_vec(type, 129.0)); + x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999)); /* ipart = int(x - 0.5) */ - ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), ""); + ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), ""); ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, ""); /* fpart = x - ipart */ @@ -1371,8 +1389,8 @@ lp_build_exp2_approx(struct lp_build_context *bld, if(p_exp2_int_part || p_exp2) { /* expipart = (float) (1 << ipart) */ - expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), ""); - expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), ""); + expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), ""); + expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), ""); expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, ""); } @@ -1438,8 +1456,8 @@ lp_build_log2_approx(struct lp_build_context *bld, LLVMTypeRef vec_type = lp_build_vec_type(type); LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000); - LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff); + LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000); + LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff); LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type); LLVMValueRef i = NULL; @@ -1464,8 +1482,8 @@ lp_build_log2_approx(struct lp_build_context *bld, } if(p_floor_log2 || p_log2) { - logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), ""); - logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), ""); + logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), ""); + logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), ""); logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 7a10fe1220..31efa9921c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -37,7 +37,7 @@ #define LP_BLD_ARIT_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h index 5a9e1c1fb2..ebbdb1a604 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h @@ -40,7 +40,7 @@ * for a standalone example. */ -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c index 8a275fa72f..57843e9a60 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c @@ -263,7 +263,7 @@ lp_build_one(struct lp_type type) if(type.sign) /* TODO: Unfortunately this caused "Tried to create a shift operation * on a non-integer type!" */ - vec = LLVMConstLShr(vec, lp_build_int_const_scalar(type, 1)); + vec = LLVMConstLShr(vec, lp_build_const_int_vec(type, 1)); #endif return vec; @@ -283,8 +283,8 @@ lp_build_one(struct lp_type type) * Build constant-valued vector from a scalar value. */ LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val) +lp_build_const_vec(struct lp_type type, + double val) { LLVMTypeRef elem_type = lp_build_elem_type(type); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; @@ -309,7 +309,7 @@ lp_build_const_scalar(struct lp_type type, LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, +lp_build_const_int_vec(struct lp_type type, long long val) { LLVMTypeRef elem_type = lp_build_int_elem_type(type); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h index 4078636103..9ca2f0664e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h @@ -37,9 +37,9 @@ #define LP_BLD_CONST_H -#include "os/os_llvm.h" +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> struct lp_type; @@ -85,13 +85,11 @@ lp_build_one(struct lp_type type); LLVMValueRef -lp_build_const_scalar(struct lp_type type, - double val); +lp_build_const_vec(struct lp_type type, double val); LLVMValueRef -lp_build_int_const_scalar(struct lp_type type, - long long val); +lp_build_const_int_vec(struct lp_type type, long long val); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index f77cf78721..3f7f2ebde9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -114,13 +114,13 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, scale = (double)mask/ubound; bias = (double)((unsigned long long)1 << (mantissa - n)); - res = LLVMBuildMul(builder, src, lp_build_const_scalar(src_type, scale), ""); - res = LLVMBuildAdd(builder, res, lp_build_const_scalar(src_type, bias), ""); + res = LLVMBuildMul(builder, src, lp_build_const_vec(src_type, scale), ""); + res = LLVMBuildAdd(builder, res, lp_build_const_vec(src_type, bias), ""); res = LLVMBuildBitCast(builder, res, int_vec_type, ""); if(dst_width > n) { int shift = dst_width - n; - res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), ""); + res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), ""); /* TODO: Fill in the empty lower bits for additional precision? */ /* YES: this fixes progs/trivial/tri-z-eq.c. @@ -130,21 +130,21 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder, #if 0 { LLVMValueRef msb; - msb = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, dst_width - 1), ""); - msb = LLVMBuildShl(builder, msb, lp_build_int_const_scalar(src_type, shift), ""); - msb = LLVMBuildSub(builder, msb, lp_build_int_const_scalar(src_type, 1), ""); + msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), ""); + msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), ""); + msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), ""); res = LLVMBuildOr(builder, res, msb, ""); } #elif 0 while(shift > 0) { - res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_int_const_scalar(src_type, n), ""), ""); + res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), ""); shift -= n; n *= 2; } #endif } else - res = LLVMBuildAnd(builder, res, lp_build_int_const_scalar(src_type, mask), ""); + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), ""); return res; } @@ -183,10 +183,10 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, if(src_width > mantissa) { int shift = src_width - mantissa; - res = LLVMBuildLShr(builder, res, lp_build_int_const_scalar(dst_type, shift), ""); + res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), ""); } - bias_ = lp_build_const_scalar(dst_type, bias); + bias_ = lp_build_const_vec(dst_type, bias); res = LLVMBuildOr(builder, res, @@ -195,7 +195,7 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder, res = LLVMBuildBitCast(builder, res, vec_type, ""); res = LLVMBuildSub(builder, res, bias_, ""); - res = LLVMBuildMul(builder, res, lp_build_const_scalar(dst_type, scale), ""); + res = LLVMBuildMul(builder, res, lp_build_const_vec(dst_type, scale), ""); return res; } @@ -251,7 +251,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_min == 0.0) thres = bld.zero; else - thres = lp_build_const_scalar(src_type, dst_min); + thres = lp_build_const_vec(src_type, dst_min); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_max(&bld, tmp[i], thres); } @@ -260,7 +260,7 @@ lp_build_conv(LLVMBuilderRef builder, if(dst_max == 1.0) thres = bld.one; else - thres = lp_build_const_scalar(src_type, dst_max); + thres = lp_build_const_vec(src_type, dst_max); for(i = 0; i < num_tmps; ++i) tmp[i] = lp_build_min(&bld, tmp[i], thres); } @@ -288,7 +288,7 @@ lp_build_conv(LLVMBuilderRef builder, LLVMTypeRef tmp_vec_type; if (dst_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, dst_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, dst_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -315,7 +315,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift > dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, src_shift - dst_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, src_shift - dst_shift); for(i = 0; i < num_tmps; ++i) if(src_type.sign) tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, ""); @@ -388,7 +388,7 @@ lp_build_conv(LLVMBuilderRef builder, } if (src_scale != 1.0) { - LLVMValueRef scale = lp_build_const_scalar(tmp_type, 1.0/src_scale); + LLVMValueRef scale = lp_build_const_vec(tmp_type, 1.0/src_scale); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildMul(builder, tmp[i], scale, ""); } @@ -400,7 +400,7 @@ lp_build_conv(LLVMBuilderRef builder, /* FIXME: compensate different offsets too */ if(src_shift < dst_shift) { - LLVMValueRef shift = lp_build_int_const_scalar(tmp_type, dst_shift - src_shift); + LLVMValueRef shift = lp_build_const_int_vec(tmp_type, dst_shift - src_shift); for(i = 0; i < num_tmps; ++i) tmp[i] = LLVMBuildShl(builder, tmp[i], shift, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index 78e8155ff7..628831c3ad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -37,7 +37,7 @@ #define LP_BLD_CONV_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index 441ad94786..7b010cbdb0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -30,7 +30,7 @@ #define LP_BLD_DEBUG_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_compiler.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.c b/src/gallium/auxiliary/gallivm/lp_bld_depth.c index f08f8eb6d8..cbc48f9865 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.c @@ -61,11 +61,104 @@ #include "util/u_format.h" #include "lp_bld_type.h" +#include "lp_bld_arit.h" #include "lp_bld_const.h" #include "lp_bld_logic.h" #include "lp_bld_flow.h" #include "lp_bld_debug.h" #include "lp_bld_depth.h" +#include "lp_bld_swizzle.h" + + + +/** + * Do the stencil test comparison (compare fb Z values against ref value. + * \param stencilVals vector of stencil values from framebuffer + * \param stencilRef the stencil reference value, replicated as a vector + * \return mask of pass/fail values + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilVals, + LLVMValueRef stencilRef) +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); + stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); + } + + res = lp_build_compare(bld->builder, bld->type, stencil->func, + stencilVals, stencilRef); + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + unsigned stencil_op, + LLVMValueRef stencilRef, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilVals) + +{ + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(type, stencilMax); + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + case PIPE_STENCIL_OP_REPLACE: + res = lp_build_broadcast_scalar(bld, stencilRef); + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(bld->builder, res, max, ""); + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(bld->builder, stencilVals, ""); + default: + assert(0 && "bad stencil op mode"); + res = NULL; + } + + if (stencil->writemask != stencilMax) { + /* compute res = (res & mask) | (stencilVals & ~mask) */ + LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask); + LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask"); + LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1"); + LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2"); + res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2"); + } + + return res; +} /** @@ -109,7 +202,14 @@ lp_depth_type(const struct util_format_description *format_desc, /** - * Depth test. + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically a 2x2 quad). + * + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad + * \param src the incoming depth/stencil values (a 2x2 quad) + * \param dst_ptr the outgoing/updated depth/stencil values */ void lp_build_depth_test(LLVMBuilderRef builder, @@ -126,6 +226,9 @@ lp_build_depth_test(LLVMBuilderRef builder, LLVMValueRef z_bitmask = NULL; LLVMValueRef test; + (void) lp_build_stencil_test; + (void) lp_build_stencil_op; + if(!state->enabled) return; @@ -185,11 +288,11 @@ lp_build_depth_test(LLVMBuilderRef builder, if(padding_left || padding_right) { const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1; const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1; - z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right); + z_bitmask = lp_build_const_int_vec(type, mask_left ^ mask_right); } if(padding_left) - src = LLVMBuildLShr(builder, src, lp_build_int_const_scalar(type, padding_left), ""); + src = LLVMBuildLShr(builder, src, lp_build_const_int_vec(type, padding_left), ""); if(padding_right) src = LLVMBuildAnd(builder, src, z_bitmask, ""); if(padding_left || padding_right) @@ -198,6 +301,7 @@ lp_build_depth_test(LLVMBuilderRef builder, lp_build_name(dst, "zsbuf.z"); + /* compare src Z to dst Z, returning 'pass' mask */ test = lp_build_cmp(&bld, state->func, src, dst); lp_build_mask_update(mask, test); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h index 8be80024ae..8375824cbf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h @@ -36,7 +36,7 @@ #define LP_BLD_DEPTH_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_depth_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index c2f35419ec..106fc03e46 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -648,7 +648,9 @@ lp_build_if(struct lp_build_if_state *ctx, ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); /* add add the initial value of the var from the entry block */ - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1); + if (!LLVMIsUndef(*flow->variables[i])) + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], + &ifthen->entry_block, 1); } /* create/insert true_block before merge_block */ @@ -695,18 +697,21 @@ lp_build_endif(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; + LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); unsigned i; ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); + /* Insert branch to the merge block from current block */ + LLVMBuildBr(ctx->builder, ifthen->merge_block); + if (ifthen->false_block) { LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); /* for each variable, update the Phi node with a (variable, block) pair */ for (i = 0; i < flow->num_variables; i++) { assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1); - + LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); /* replace the variable ref with the phi function */ *flow->variables[i] = ifthen->phi[i]; } @@ -742,15 +747,18 @@ lp_build_endif(struct lp_build_if_state *ctx) ifthen->true_block, ifthen->merge_block); } - /* Append an unconditional Br(anch) instruction on the true_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); - LLVMBuildBr(ctx->builder, ifthen->merge_block); + /* Insert branch from end of true_block to merge_block */ if (ifthen->false_block) { - /* Append an unconditional Br(anch) instruction on the false_block */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block); + /* Append an unconditional Br(anch) instruction on the true_block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); LLVMBuildBr(ctx->builder, ifthen->merge_block); } - + else { + /* No else clause. + * Note that we've already inserted the branch at the end of + * true_block. See the very first LLVMBuildBr() call in this function. + */ + } /* Resume building code at end of the ifthen->merge_block */ LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index e158836549..c2b50e1b60 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -35,7 +35,7 @@ #define LP_BLD_FLOW_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 8972c0dc17..73ab6de3f2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -34,7 +34,7 @@ * Pixel format helpers. */ -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_format.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index abb27e4c32..45ee4b12ce 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -114,10 +114,10 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder, case UTIL_FORMAT_TYPE_UNSIGNED: if(type.floating) { if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); if(stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; - input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), ""); + input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); } if(format_desc->channel[chan].normalized) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.c b/src/gallium/auxiliary/gallivm/lp_bld_interp.c index 2fc894017d..09efb16121 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.c @@ -289,17 +289,17 @@ pos_update(struct lp_build_interp_soa_context *bld, int quad_index) /* top-right or bottom-right quad in block */ /* build x += xstep */ x = lp_build_add(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } if (quad_index == 2) { /* bottom-left quad in block */ /* build y += ystep */ y = lp_build_add(&bld->base, y, - lp_build_const_scalar(bld->base.type, ystep)); + lp_build_const_vec(bld->base.type, ystep)); /* build x -= xstep */ x = lp_build_sub(&bld->base, x, - lp_build_const_scalar(bld->base.type, xstep)); + lp_build_const_vec(bld->base.type, xstep)); } lp_build_name(x, "pos.x"); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h index 177b5e943e..a4937bbb04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h @@ -41,7 +41,7 @@ #define LP_BLD_INTERP_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 7d5506c733..977f767322 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -37,7 +37,7 @@ #define LP_BLD_INTR_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" /** diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index f3df3dd138..e2e533fa7e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -193,7 +193,7 @@ lp_build_compare(LLVMBuilderRef builder, if(table[func].gt && ((type.width == 8 && type.sign) || (type.width != 8 && !type.sign))) { - LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); + LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); a = LLVMBuildXor(builder, a, msb, ""); b = LLVMBuildXor(builder, b, msb, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index b54ec13b70..d849d29e95 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -37,7 +37,7 @@ #define LP_BLD_LOGIC_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index 23398f41f9..2daa8a3b58 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -164,7 +164,7 @@ lp_build_unpack2(LLVMBuilderRef builder, if(dst_type.sign && src_type.sign) { /* Replicate the sign bit in the most significant bits */ - msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), ""); + msb = LLVMBuildAShr(builder, src, lp_build_const_int_vec(src_type, src_type.width - 1), ""); } else /* Most significant bits always zero */ @@ -361,7 +361,7 @@ lp_build_packs2(LLVMBuilderRef builder, if(clamp) { struct lp_build_context bld; unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width; - LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1); + LLVMValueRef dst_max = lp_build_const_int_vec(src_type, ((unsigned long long)1 << dst_bits) - 1); lp_build_context_init(&bld, builder, src_type); lo = lp_build_min(&bld, lo, dst_max); hi = lp_build_min(&bld, hi, dst_max); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index 346a17d580..41adeed220 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,7 +37,7 @@ #define LP_BLD_PACK_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 543fd5fea3..bb76ad4c6b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -84,8 +84,12 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->wrap_t = sampler->wrap_t; state->wrap_r = sampler->wrap_r; state->min_img_filter = sampler->min_img_filter; - state->min_mip_filter = sampler->min_mip_filter; state->mag_img_filter = sampler->mag_img_filter; + if (texture->last_level) { + state->min_mip_filter = sampler->min_mip_filter; + } else { + state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + } state->compare_mode = sampler->compare_mode; if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { @@ -169,7 +173,7 @@ lp_build_sample_offset(struct lp_build_context *bld, LLVMValueRef x_stride; LLVMValueRef offset; - x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8); + x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { LLVMValueRef x_lo, x_hi; @@ -191,9 +195,9 @@ lp_build_sample_offset(struct lp_build_context *bld, y_hi = LLVMBuildLShr(bld->builder, y, bld->one, ""); x_stride_lo = x_stride; - y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8); + y_stride_lo = lp_build_const_vec(bld->type, 2*format_desc->block.bits/8); - x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8); + x_stride_hi = lp_build_const_vec(bld->type, 4*format_desc->block.bits/8); y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, ""); x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 7f08bfaac1..92f3c57435 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,7 +36,7 @@ #define LP_BLD_SAMPLE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct pipe_texture; struct pipe_sampler_state; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index a410688350..995c016b9d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -292,7 +292,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, int chan; for (chan = 0; chan < 4; chan++) { LLVMValueRef border_chan = - lp_build_const_scalar(bld->texel_type, + lp_build_const_vec(bld->texel_type, bld->static_state->border_color[chan]); texel[chan] = lp_build_select(&bld->texel_bld, use_border, border_chan, texel[chan]); @@ -457,8 +457,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); - LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -512,7 +512,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, else { LLVMValueRef min, max; /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); } @@ -533,7 +533,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -545,7 +545,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); coord = lp_build_clamp(coord_bld, coord, min, max); coord = lp_build_sub(coord_bld, coord, half); @@ -620,7 +620,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef min, max; /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = 1.0 - min */ max = lp_build_sub(coord_bld, coord_bld->one, min); @@ -665,7 +665,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, struct lp_build_context *coord_bld = &bld->coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); + LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0); LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); @@ -708,7 +708,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [0.5, length - 0.5] */ - min = lp_build_const_scalar(coord_bld->type, 0.5F); + min = lp_build_const_vec(coord_bld->type, 0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -724,7 +724,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, if (bld->static_state->normalized_coords) { /* min = -1.0 / (2 * length) = -0.5 / length */ min = lp_build_mul(coord_bld, - lp_build_const_scalar(coord_bld->type, -0.5F), + lp_build_const_vec(coord_bld->type, -0.5F), lp_build_rcp(coord_bld, length_f)); /* max = length - min */ max = lp_build_sub(coord_bld, length_f, min); @@ -733,7 +733,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } else { /* clamp to [-0.5, length + 0.5] */ - min = lp_build_const_scalar(coord_bld->type, -0.5F); + min = lp_build_const_vec(coord_bld->type, -0.5F); max = lp_build_sub(coord_bld, length_f, min); } /* coord = clamp(coord, min, max) */ @@ -843,87 +843,98 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef depth) { - const int dims = texture_dims(bld->static_state->target); - struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); - LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); - LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); - - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = LLVMBuildSub(bld->builder, r1, r0, ""); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildSub(bld->builder, r2, r0, ""); - drdy = lp_build_abs(float_bld, drdy); - } + if (bld->static_state->min_lod == bld->static_state->max_lod) { + /* User is forcing sampling from a particular mipmap level. + * This is hit during mipmap generation. + */ + return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); } + else { + const int dims = texture_dims(bld->static_state->target); + struct lp_build_context *float_bld = &bld->float_bld; + LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), + bld->static_state->lod_bias); + LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->min_lod); + LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), + bld->static_state->max_lod); + + LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + + LLVMValueRef s0, s1, s2; + LLVMValueRef t0, t1, t2; + LLVMValueRef r0, r1, r2; + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho, lod; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + * XXX we're assuming a four-element quad in 2x2 layout here. + */ + s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); + s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); + s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); + dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); + t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); + t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); + dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); + r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); + r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); + drdx = LLVMBuildSub(bld->builder, r1, r0, ""); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildSub(bld->builder, r2, r0, ""); + drdy = lp_build_abs(float_bld, drdy); + } + } - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } } - } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); - /* add lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); + /* add lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); - /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); + /* clamp lod */ + lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); - return lod; + return lod; + } } @@ -986,7 +997,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, last_level); /* compute level 1 and clamp to legal range of levels */ *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); - *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero); + *level1_out = lp_build_min(int_bld, *level1_out, last_level); *weight_out = lp_build_fract(float_bld, lod); } @@ -994,6 +1005,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld, /** * Generate code to sample a mipmap level with nearest filtering. + * If sampling a cube texture, r = cube face in [0,5]. */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, @@ -1031,6 +1043,9 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->wrap_r); lp_build_name(z, "tex.z.wrapped"); } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z = r; + } else { z = NULL; } @@ -1051,7 +1066,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, /** * Generate code to sample a mipmap level with linear filtering. - * 1D, 2D and 3D images are suppored. + * If sampling a cube texture, r = cube face in [0,5]. */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, @@ -1098,8 +1113,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(z0, "tex.z0.wrapped"); lp_build_name(z1, "tex.z1.wrapped"); } + else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { + z0 = z1 = r; /* cube face */ + r_fpart = NULL; + } else { - z0 = z1 = r_fpart = NULL; + z0 = z1 = NULL; + r_fpart = NULL; } } else { @@ -1201,6 +1221,70 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } +/** Helper used by lp_build_cube_lookup() */ +static LLVMValueRef +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) +{ + /* ima = -0.5 / abs(coord); */ + LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); + LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); + LLVMValueRef ima = lp_build_mul(coord_bld, negHalf, + lp_build_rcp(coord_bld, absCoord)); + return ima; +} + + +/** + * Helper used by lp_build_cube_lookup() + * \param sign scalar +1 or -1 + * \param coord float vector + * \param ima float vector + */ +static LLVMValueRef +lp_build_cube_coord(struct lp_build_context *coord_bld, + LLVMValueRef sign, int negate_coord, + LLVMValueRef coord, LLVMValueRef ima) +{ + /* return negate(coord) * ima * sign + 0.5; */ + LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); + LLVMValueRef res; + + assert(negate_coord == +1 || negate_coord == -1); + + if (negate_coord == -1) { + coord = lp_build_negate(coord_bld, coord); + } + + res = lp_build_mul(coord_bld, coord, ima); + if (sign) { + sign = lp_build_broadcast_scalar(coord_bld, sign); + res = lp_build_mul(coord_bld, res, sign); + } + res = lp_build_add(coord_bld, res, half); + + return res; +} + + +/** Helper used by lp_build_cube_lookup() + * Return (major_coord >= 0) ? pos_face : neg_face; + */ +static LLVMValueRef +lp_build_cube_face(struct lp_build_sample_context *bld, + LLVMValueRef major_coord, + unsigned pos_face, unsigned neg_face) +{ + LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + major_coord, + bld->float_bld.zero, ""); + LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); + LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); + LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); + return res; +} + + + /** * Generate code to do cube face selection and per-face texcoords. */ @@ -1213,11 +1297,10 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef *face_s, LLVMValueRef *face_t) { -#if 0 struct lp_build_context *float_bld = &bld->float_bld; + struct lp_build_context *coord_bld = &bld->coord_bld; LLVMValueRef rx, ry, rz; LLVMValueRef arx, ary, arz; - LLVMValueRef sc, tc, ma; LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); LLVMValueRef arx_ge_ary, arx_ge_arz; LLVMValueRef ary_ge_arx, ary_ge_arz; @@ -1257,34 +1340,165 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, { struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx, if2_ctx; + struct lp_build_if_state if_ctx; flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + + *face_s = bld->coord_bld.undef; + *face_t = bld->coord_bld.undef; + *face = bld->int_bld.undef; + + lp_build_name(*face_s, "face_s"); + lp_build_name(*face_t, "face_t"); + lp_build_name(*face, "face"); + + lp_build_flow_scope_declare(flow_ctx, face_s); + lp_build_flow_scope_declare(flow_ctx, face_t); + lp_build_flow_scope_declare(flow_ctx, face); lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); { -#if 0 - lp_build_if(&if2_ctx, flow_ctx, bld->builder, rx_pos); + /* +/- X face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rx); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); + *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); + *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + *face = lp_build_cube_face(bld, rx, + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X); + } + lp_build_else(&if_ctx); + { + struct lp_build_flow_context *flow_ctx2; + struct lp_build_if_state if_ctx2; + + LLVMValueRef face_s2 = bld->coord_bld.undef; + LLVMValueRef face_t2 = bld->coord_bld.undef; + LLVMValueRef face2 = bld->int_bld.undef; + + flow_ctx2 = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx2); + lp_build_flow_scope_declare(flow_ctx2, &face_s2); + lp_build_flow_scope_declare(flow_ctx2, &face_t2); + lp_build_flow_scope_declare(flow_ctx2, &face2); + + ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); + + lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); { - /* Positive X face */ + /* +/- Y face */ + LLVMValueRef sign = lp_build_sgn(float_bld, ry); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); + face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); + face2 = lp_build_cube_face(bld, ry, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y); } - lp_build_else(&if2_ctx); + lp_build_else(&if_ctx2); { - /* Negative X face */ + /* +/- Z face */ + LLVMValueRef sign = lp_build_sgn(float_bld, rz); + LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); + face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); + face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); + face2 = lp_build_cube_face(bld, rz, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z); } - lp_build_endif(&if2_ctx); -#endif + lp_build_endif(&if_ctx2); + lp_build_flow_scope_end(flow_ctx2); + lp_build_flow_destroy(flow_ctx2); + + *face_s = face_s2; + *face_t = face_t2; + *face = face2; } - lp_build_else(&if_ctx); - { + lp_build_endif(&if_ctx); + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); + } +} + + + +/** + * Sample the texture/mipmap using given image filter and mip filter. + * data0_ptr and data1_ptr point to the two mipmap levels to sample + * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. + * If we're using nearest miplevel sampling the '1' values will be null/unused. + */ +static void +lp_build_sample_mipmap(struct lp_build_sample_context *bld, + unsigned img_filter, + unsigned mip_filter, + LLVMValueRef s, + LLVMValueRef t, + LLVMValueRef r, + LLVMValueRef lod_fpart, + LLVMValueRef width0_vec, + LLVMValueRef width1_vec, + LLVMValueRef height0_vec, + LLVMValueRef height1_vec, + LLVMValueRef depth0_vec, + LLVMValueRef depth1_vec, + LLVMValueRef row_stride0_vec, + LLVMValueRef row_stride1_vec, + LLVMValueRef img_stride0_vec, + LLVMValueRef img_stride1_vec, + LLVMValueRef data_ptr0, + LLVMValueRef data_ptr1, + LLVMValueRef *colors_out) +{ + LLVMValueRef colors0[4], colors1[4]; + int chan; + + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_nearest(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); } - lp_build_endif(&if_ctx); + } + else { + assert(img_filter == PIPE_TEX_FILTER_LINEAR); - lp_build_flow_destroy(flow_ctx); + lp_build_sample_image_linear(bld, + width0_vec, height0_vec, depth0_vec, + row_stride0_vec, img_stride0_vec, + data_ptr0, s, t, r, colors0); + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* sample the second mipmap level, and interp */ + lp_build_sample_image_linear(bld, + width1_vec, height1_vec, depth1_vec, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, colors1); + } + } + + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { + /* interpolate samples from the two mipmap levels */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, + colors0[chan], colors1[chan]); + } + } + else { + /* use first/only level's colors */ + for (chan = 0; chan < 4; chan++) { + colors_out[chan] = colors0[chan]; + } } -#endif } @@ -1311,6 +1525,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef data_array, LLVMValueRef *colors_out) { + struct lp_build_context *float_bld = &bld->float_bld; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; @@ -1322,7 +1537,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; LLVMValueRef data_ptr0, data_ptr1; - int chan; /* printf("%s mip %d min %d mag %d\n", __FUNCTION__, @@ -1330,16 +1544,24 @@ lp_build_sample_general(struct lp_build_sample_context *bld, */ /* - * Compute the level of detail (mipmap level index(es)). + * Compute the level of detail (float). + */ + if (min_filter != mag_filter || + mip_filter != PIPE_TEX_MIPFILTER_NONE) { + /* Need to compute lod either to choose mipmap levels or to + * distinguish between minification/magnification with one mipmap level. + */ + lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); + } + + /* + * Compute integer mipmap level(s) to fetch texels from. */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* always use mip level 0 */ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); } else { - /* compute float LOD */ - lod = lp_build_lod_selector(bld, s, t, r, width, height, depth); - if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); } @@ -1366,33 +1588,41 @@ lp_build_sample_general(struct lp_build_sample_context *bld, height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, ilevel0); - if (dims == 3) { - depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { img_stride0_vec = lp_build_mul(&bld->int_coord_bld, row_stride0_vec, height0_vec); + if (dims == 3) { + depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); + } } } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at ilevel1 */ + /* compute width, height, depth for second mipmap level at 'ilevel1' */ width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); if (dims >= 2) { height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, ilevel1); - if (dims == 3) { - depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { img_stride1_vec = lp_build_mul(&bld->int_coord_bld, row_stride1_vec, height1_vec); + if (dims ==3) { + depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); + } } } } /* - * Choose cube face, recompute texcoords. + * Choose cube face, recompute per-face texcoords. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef face, face_s, face_t; lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); + s = face_s; /* vec */ + t = face_t; /* vec */ + /* use 'r' to indicate cube face */ + r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ } /* @@ -1406,62 +1636,67 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* * Get/interpolate texture colors. */ - /* XXX temporarily force this path: */ - if (1 /*min_filter == mag_filter*/) { - /* same filter for minification or magnification */ - LLVMValueRef colors0[4], colors1[4]; - - if (min_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, - row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } - } - else { - assert(min_filter == PIPE_TEX_FILTER_LINEAR); + if (min_filter == mag_filter) { + /* no need to distinquish between minification and magnification */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); + } + else { + /* Emit conditional to choose min image filter or mag image filter + * depending on the lod being >0 or <= 0, respectively. + */ + struct lp_build_flow_context *flow_ctx; + struct lp_build_if_state if_ctx; + LLVMValueRef minify; - lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, - row_stride0_vec, img_stride0_vec, - data_ptr0, s, t, r, colors0); + flow_ctx = lp_build_flow_create(bld->builder); + lp_build_flow_scope_begin(flow_ctx); + lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); + lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level, and interp */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, colors1); - } - } + /* minify = lod > 0.0 */ + minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, + lod, float_bld->zero, ""); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, - colors0[chan], colors1[chan]); - } + lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); + { + /* Use the minification filter */ + lp_build_sample_mipmap(bld, min_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); } - else { - /* use first/only level's colors */ - for (chan = 0; chan < 4; chan++) { - colors_out[chan] = colors0[chan]; - } + lp_build_else(&if_ctx); + { + /* Use the magnification filter */ + lp_build_sample_mipmap(bld, mag_filter, mip_filter, + s, t, r, lod_fpart, + width0_vec, width1_vec, + height0_vec, height1_vec, + depth0_vec, depth1_vec, + row_stride0_vec, row_stride1_vec, + img_stride0_vec, img_stride1_vec, + data_ptr0, data_ptr1, + colors_out); } - } - else { - /* emit conditional to choose min image filter or mag image filter - * depending on the lod being >0 or <= 0, respectively. - */ - abort(); + lp_build_endif(&if_ctx); + + lp_build_flow_scope_end(flow_ctx); + lp_build_flow_destroy(flow_ctx); } } @@ -1473,7 +1708,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, LLVMValueRef packed, LLVMValueRef *rgba) { - LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); + LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); unsigned chan; /* Decode the input vector components */ @@ -1485,7 +1720,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, input = packed; if(start) - input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); + input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), ""); if(stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); @@ -1547,17 +1782,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_int_const_scalar(i32.type, -128); + i32_c128 = lp_build_const_int_vec(i32.type, -128); s = LLVMBuildAdd(builder, s, i32_c128, ""); t = LLVMBuildAdd(builder, t, i32_c128, ""); /* compute floor (shift right 8) */ - i32_c8 = lp_build_int_const_scalar(i32.type, 8); + i32_c8 = lp_build_const_int_vec(i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_int_const_scalar(i32.type, 255); + i32_c255 = lp_build_const_int_vec(i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -1724,7 +1959,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, } assert(res); - res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); + res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25)); /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ for(chan = 0; chan < 3; ++chan) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h index 34478c10f5..147336edb4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h @@ -37,7 +37,7 @@ #define LP_BLD_STRUCT_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include <llvm-c/Target.h> #include "util/u_debug.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 64e81f7b1f..278c838eac 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -144,9 +144,9 @@ lp_build_broadcast_aos(struct lp_build_context *bld, #endif if(shift > 0) - tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); + tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), ""); if(shift < 0) - tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); + tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), ""); assert(tmp); if(tmp) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 57b5cc079f..138ca620e6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -37,7 +37,7 @@ #define LP_BLD_SWIZZLE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_type; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 0f2f8a65b1..63b938bfa9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -35,7 +35,7 @@ #ifndef LP_BLD_TGSI_H #define LP_BLD_TGSI_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct tgsi_token; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 5ec59d636c..f160be878f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -475,7 +475,7 @@ emit_store( break; case TGSI_SAT_MINUS_PLUS_ONE: - value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); + value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; @@ -996,7 +996,7 @@ emit_instruction( src0 = emit_fetch( bld, inst, 0, chan_index ); src1 = emit_fetch( bld, inst, 1, chan_index ); src2 = emit_fetch( bld, inst, 2, chan_index ); - tmp1 = lp_build_const_scalar(bld->base.type, 0.5); + tmp1 = lp_build_const_vec(bld->base.type, 0.5); tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); } @@ -1713,7 +1713,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, assert(num_immediates < LP_MAX_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = - lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); + lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); for( i = size; i < 4; ++i ) bld.immediates[num_immediates][i] = bld.base.undef; num_immediates++; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 5b351476ac..cd59d2faa6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -37,9 +37,9 @@ #define LP_BLD_TYPE_H -#include "os/os_llvm.h" +#include "pipe/p_compiler.h" +#include "gallivm/lp_bld.h" -#include <pipe/p_compiler.h> /** diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h index 5b55c1b374..7e0f67a76b 100644 --- a/src/gallium/auxiliary/os/os_time.h +++ b/src/gallium/auxiliary/os/os_time.h @@ -71,7 +71,7 @@ os_time_sleep(int64_t usecs); /* * Helper function for detecting time outs, taking in account overflow. * - * Returns true the the current time has elapsed beyond the specified interval. + * Returns true if the current time has elapsed beyond the specified interval. */ static INLINE boolean os_time_timeout(int64_t start, diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index c9ec2b32bf..c3ec9ae3f4 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -393,10 +393,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) return &fetch_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &fetch_A8R8G8B8_UNORM; + return &fetch_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &fetch_B8G8R8A8_UNORM; + return &fetch_A8R8G8B8_UNORM; case PIPE_FORMAT_R32_FIXED: return &fetch_R32_FIXED; @@ -552,10 +552,10 @@ static emit_func get_emit_func( enum pipe_format format ) return &emit_R8G8B8A8_SSCALED; case PIPE_FORMAT_B8G8R8A8_UNORM: - return &emit_A8R8G8B8_UNORM; + return &emit_B8G8R8A8_UNORM; case PIPE_FORMAT_A8R8G8B8_UNORM: - return &emit_B8G8R8A8_UNORM; + return &emit_A8R8G8B8_UNORM; default: assert(0); diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 03e093c11e..c13e742738 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -336,7 +336,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_load_R32G32B32A32(p, dataXMM, srcECX); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); break; @@ -360,7 +360,7 @@ static boolean translate_attr( struct translate_sse *p, case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_store_R32G32B32A32(p, dstEAX, dataXMM); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); break; diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 94be682c4b..e997cfa8a3 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -421,26 +421,31 @@ void debug_dump_image(const char *prefix, #endif } -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface) { struct pipe_texture *texture; - struct pipe_screen *screen; struct pipe_transfer *transfer; void *data; if (!surface) return; + /* XXX: this doesn't necessarily work, as the driver may be using + * temporary storage for the surface which hasn't been propagated + * back into the texture. Need to nail down the semantics of views + * and transfers a bit better before we can say if extra work needs + * to be done here: + */ texture = surface->texture; - screen = texture->screen; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe->get_tex_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - data = screen->transfer_map(screen, transfer); + data = pipe->transfer_map(pipe, transfer); if(!data) goto error; @@ -452,13 +457,14 @@ void debug_dump_surface(const char *prefix, transfer->stride, data); - screen->transfer_unmap(screen, transfer); + pipe->transfer_unmap(pipe, transfer); error: - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); } -void debug_dump_texture(const char *prefix, +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, struct pipe_texture *texture) { struct pipe_surface *surface; @@ -473,7 +479,7 @@ void debug_dump_texture(const char *prefix, surface = screen->get_tex_surface(screen, texture, 0, 0, 0, PIPE_TEXTURE_USAGE_SAMPLER); if (surface) { - debug_dump_surface(prefix, surface); + debug_dump_surface(pipe, prefix, surface); screen->tex_surface_destroy(surface); } } @@ -511,27 +517,28 @@ struct bmp_rgb_quad { }; void -debug_dump_surface_bmp(const char *filename, +debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct pipe_transfer *transfer; struct pipe_texture *texture = surface->texture; - struct pipe_screen *screen = texture->screen; - transfer = screen->get_tex_transfer(screen, texture, surface->face, - surface->level, surface->zslice, - PIPE_TRANSFER_READ, 0, 0, surface->width, - surface->height); + transfer = pipe->get_tex_transfer(pipe, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - debug_dump_transfer_bmp(filename, transfer); + debug_dump_transfer_bmp(pipe, filename, transfer); - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); #endif } void -debug_dump_transfer_bmp(const char *filename, +debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer) { #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT @@ -544,7 +551,7 @@ debug_dump_transfer_bmp(const char *filename, if(!rgba) goto error1; - pipe_get_tile_rgba(transfer, 0, 0, + pipe_get_tile_rgba(pipe, transfer, 0, 0, transfer->width, transfer->height, rgba); diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index 0f4768f344..98addeb372 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -312,6 +312,7 @@ debug_memory_end(unsigned long beginning); #ifdef DEBUG +struct pipe_context; struct pipe_surface; struct pipe_transfer; struct pipe_texture; @@ -321,21 +322,25 @@ void debug_dump_image(const char *prefix, unsigned width, unsigned height, unsigned stride, const void *data); -void debug_dump_surface(const char *prefix, +void debug_dump_surface(struct pipe_context *pipe, + const char *prefix, struct pipe_surface *surface); -void debug_dump_texture(const char *prefix, +void debug_dump_texture(struct pipe_context *pipe, + const char *prefix, struct pipe_texture *texture); -void debug_dump_surface_bmp(const char *filename, +void debug_dump_surface_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_surface *surface); -void debug_dump_transfer_bmp(const char *filename, +void debug_dump_transfer_bmp(struct pipe_context *pipe, + const char *filename, struct pipe_transfer *transfer); void debug_dump_float_rgba_bmp(const char *filename, unsigned width, unsigned height, float *rgba, unsigned stride); #else #define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) -#define debug_dump_surface(prefix, surface) ((void)0) -#define debug_dump_surface_bmp(filename, surface) ((void)0) +#define debug_dump_surface(pipe, prefix, surface) ((void)0) +#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0) #define debug_dump_transfer_bmp(filename, transfer) ((void)0) #define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0) #endif diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d421bee8ef..5c51b53d7b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1119,7 +1119,6 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1128,27 +1127,27 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; void *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_1d(pt->format, srcTrans->width, srcMap, dstTrans->width, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } } @@ -1159,7 +1158,6 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, uint face, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; const uint zslice = 0; uint dstLevel; @@ -1171,17 +1169,17 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_2d(pt->format, srcTrans->width, srcTrans->height, @@ -1189,11 +1187,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstTrans->width, dstTrans->height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } } @@ -1216,17 +1214,17 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice, PIPE_TRANSFER_READ, 0, 0, u_minify(pt->width0, srcLevel), u_minify(pt->height0, srcLevel)); - dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice, PIPE_TRANSFER_WRITE, 0, 0, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); - dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans); reduce_3d(pt->format, srcTrans->width, srcTrans->height, @@ -1234,11 +1232,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, dstTrans->width, dstTrans->height, dstTrans->stride, dstMap); - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } #else (void) reduce_3d; diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 0cb3432c6e..e7255e3baa 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -264,24 +264,24 @@ pipe_buffer_read(struct pipe_screen *screen, } static INLINE void * -pipe_transfer_map( struct pipe_transfer *transf ) +pipe_transfer_map( struct pipe_context *context, + struct pipe_transfer *transf ) { - struct pipe_screen *screen = transf->texture->screen; - return screen->transfer_map(screen, transf); + return context->transfer_map(context, transf); } static INLINE void -pipe_transfer_unmap( struct pipe_transfer *transf ) +pipe_transfer_unmap( struct pipe_context *context, + struct pipe_transfer *transf ) { - struct pipe_screen *screen = transf->texture->screen; - screen->transfer_unmap(screen, transf); + context->transfer_unmap(context, transf); } static INLINE void -pipe_transfer_destroy( struct pipe_transfer *transf ) +pipe_transfer_destroy( struct pipe_context *context, + struct pipe_transfer *transfer ) { - struct pipe_screen *screen = transf->texture->screen; - screen->tex_transfer_destroy(transf); + context->tex_transfer_destroy(context, transfer); } static INLINE unsigned diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 8479161c74..e73797f1b7 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -169,7 +169,6 @@ util_surface_copy(struct pipe_context *pipe, unsigned src_x, unsigned src_y, unsigned w, unsigned h) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; @@ -182,7 +181,7 @@ util_surface_copy(struct pipe_context *pipe, src_format = src->texture->format; dst_format = dst->texture->format; - src_trans = screen->get_tex_transfer(screen, + src_trans = pipe->get_tex_transfer(pipe, src->texture, src->face, src->level, @@ -190,7 +189,7 @@ util_surface_copy(struct pipe_context *pipe, PIPE_TRANSFER_READ, src_x, src_y, w, h); - dst_trans = screen->get_tex_transfer(screen, + dst_trans = pipe->get_tex_transfer(pipe, dst->texture, dst->face, dst->level, @@ -202,8 +201,8 @@ util_surface_copy(struct pipe_context *pipe, assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); - src_map = pipe->screen->transfer_map(screen, src_trans); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + src_map = pipe->transfer_map(pipe, src_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(src_map); assert(dst_map); @@ -221,11 +220,11 @@ util_surface_copy(struct pipe_context *pipe, do_flip ? h - 1 : 0); } - pipe->screen->transfer_unmap(pipe->screen, src_trans); - pipe->screen->transfer_unmap(pipe->screen, dst_trans); + pipe->transfer_unmap(pipe, src_trans); + pipe->transfer_unmap(pipe, dst_trans); - screen->tex_transfer_destroy(src_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->tex_transfer_destroy(pipe, src_trans); + pipe->tex_transfer_destroy(pipe, dst_trans); } @@ -243,14 +242,13 @@ util_surface_fill(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height, unsigned value) { - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *dst_trans; void *dst_map; assert(dst->texture); if (!dst->texture) return; - dst_trans = screen->get_tex_transfer(screen, + dst_trans = pipe->get_tex_transfer(pipe, dst->texture, dst->face, dst->level, @@ -258,7 +256,7 @@ util_surface_fill(struct pipe_context *pipe, PIPE_TRANSFER_WRITE, dstx, dsty, width, height); - dst_map = pipe->screen->transfer_map(screen, dst_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); assert(dst_map); @@ -302,6 +300,6 @@ util_surface_fill(struct pipe_context *pipe, } } - pipe->screen->transfer_unmap(pipe->screen, dst_trans); - screen->tex_transfer_destroy(dst_trans); + pipe->transfer_unmap(pipe, dst_trans); + pipe->tex_transfer_destroy(pipe, dst_trans); } diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 79481b710b..e445895efc 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -45,11 +45,11 @@ * Move raw block of pixels from transfer object to user memory. */ void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { - struct pipe_screen *screen = pt->texture->screen; const void *src; if (dst_stride == 0) @@ -58,14 +58,14 @@ pipe_get_tile_raw(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - src = screen->transfer_map(screen, pt); + src = pipe->transfer_map(pipe, pt); assert(src); if(!src) return; util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -73,11 +73,11 @@ pipe_get_tile_raw(struct pipe_transfer *pt, * Move raw block of pixels from user memory to transfer object. */ void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *src, int src_stride) { - struct pipe_screen *screen = pt->texture->screen; void *dst; enum pipe_format format = pt->texture->format; @@ -87,14 +87,14 @@ pipe_put_tile_raw(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - dst = screen->transfer_map(screen, pt); + dst = pipe->transfer_map(pipe, pt); assert(dst); if(!dst) return; util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0); - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } @@ -1246,7 +1246,8 @@ pipe_tile_raw_to_rgba(enum pipe_format format, void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p) { @@ -1265,7 +1266,7 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) assert((x & 1) == 0); - pipe_get_tile_raw(pt, x, y, w, h, packed, 0); + pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0); pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride); @@ -1274,7 +1275,8 @@ pipe_get_tile_rgba(struct pipe_transfer *pt, void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p) { @@ -1363,7 +1365,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, 0, 0, w, h); } - pipe_put_tile_raw(pt, x, y, w, h, packed, 0); + pipe_put_tile_raw(pipe, pt, x, y, w, h, packed, 0); FREE(packed); } @@ -1373,11 +1375,11 @@ pipe_put_tile_rgba(struct pipe_transfer *pt, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z) { - struct pipe_screen *screen = pt->texture->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; @@ -1387,7 +1389,7 @@ pipe_get_tile_z(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1453,16 +1455,16 @@ pipe_get_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *zSrc) { - struct pipe_screen *screen = pt->texture->screen; const uint srcStride = w; const uint *ptrc = zSrc; ubyte *map; @@ -1472,7 +1474,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)screen->transfer_map(screen, pt); + map = (ubyte *)pipe->transfer_map(pipe, pt); if (!map) { assert(0); return; @@ -1560,7 +1562,7 @@ pipe_put_tile_z(struct pipe_transfer *pt, assert(0); } - screen->transfer_unmap(screen, pt); + pipe->transfer_unmap(pipe, pt); } diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index 1453af38b8..8329087cfa 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -56,34 +56,40 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_transfer *pt, +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_transfer *pt, +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_transfer *pt, +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_transfer *pt, +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_transfer *pt, +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_transfer *pt, +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index 0763b5bb0e..beb4722901 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -680,14 +680,14 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r) assert(r); for (i = 0; i < 3; ++i) { - r->tex_transfer[i] = r->pipe->screen->get_tex_transfer + r->tex_transfer[i] = r->pipe->get_tex_transfer ( - r->pipe->screen, r->textures.all[i], + r->pipe, r->textures.all[i], 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, r->textures.all[i]->width0, r->textures.all[i]->height0 ); - r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]); + r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]); } } @@ -699,8 +699,8 @@ xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r) assert(r); for (i = 0; i < 3; ++i) { - r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]); - r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]); + r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]); + r->pipe->tex_transfer_destroy(r->pipe, r->tex_transfer[i]); } } diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index afcea616d5..f6cb1fc9be 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -158,6 +158,7 @@ cell_create_context(struct pipe_screen *screen, cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_vertex_functions(cell); + cell_init_texture_transfer_funcs(cell); cell->draw = cell_draw_create(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 576d514741..c54576b3c3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1352,7 +1352,7 @@ gen_stencil_values(struct spe_function *f, */ ASSERT(fbS_reg != newS_reg); - /* The code also assumes the the stencil_max_value is of the form + /* The code also assumes that the stencil_max_value is of the form * 2^n-1 and can therefore be used as a mask for the valid bits in * addition to a maximum. Make sure this is the case as well. * The clever math below exploits the fact that incrementing a diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 31fd963d19..f5528a7ec6 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -142,8 +142,10 @@ cell_is_format_supported( struct pipe_screen *screen, format == PIPE_FORMAT_A8B8G8R8_SRGB) return FALSE; - if (tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if (!winsys->is_displaytarget_format_supported(winsys, format)) + if (tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { + if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) return FALSE; } diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 28e5e6d706..39284f3a5d 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -135,7 +135,7 @@ cell_thread_function(void *arg) /** * Create the SPU threads. This is done once during driver initialization. - * This involves setting the the "init" message which is sent to each SPU. + * This involves setting the "init" message which is sent to each SPU. * The init message specifies an SPU id, total number of SPUs, location * and number of batch buffers, etc. */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index c8a1acd86a..5b169afaf8 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -105,6 +105,7 @@ cell_displaytarget_layout(struct pipe_screen *screen, /* Round up the surface size to a multiple of the tile size? */ ct->dt = winsys->displaytarget_create(winsys, + ct->base->tex_usage, ct->base.format, ct->base.width0, ct->base.height0, @@ -355,7 +356,7 @@ cell_tex_surface_destroy(struct pipe_surface *surf) * back out for glGetTexImage). */ static struct pipe_transfer * -cell_get_tex_transfer(struct pipe_screen *screen, +cell_get_tex_transfer(struct pipe_context *ctx, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -402,7 +403,7 @@ cell_get_tex_transfer(struct pipe_screen *screen, static void -cell_tex_transfer_destroy(struct pipe_transfer *t) +cell_tex_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) { struct cell_transfer *transfer = cell_transfer(t); /* Effectively do the texture_update work here - if texture images @@ -419,7 +420,7 @@ cell_tex_transfer_destroy(struct pipe_transfer *t) * Return pointer to texture image data in linear layout. */ static void * -cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) +cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct cell_transfer *ctrans = cell_transfer(transfer); struct pipe_texture *pt = transfer->texture; @@ -471,7 +472,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) * to tiled data. */ static void -cell_transfer_unmap(struct pipe_screen *screen, +cell_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct cell_transfer *ctrans = cell_transfer(transfer); @@ -560,11 +561,14 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->get_tex_surface = cell_get_tex_surface; screen->tex_surface_destroy = cell_tex_surface_destroy; - screen->get_tex_transfer = cell_get_tex_transfer; - screen->tex_transfer_destroy = cell_tex_transfer_destroy; - - screen->transfer_map = cell_transfer_map; - screen->transfer_unmap = cell_transfer_unmap; - screen->flush_frontbuffer = cell_flush_frontbuffer; } + +void +cell_init_texture_transfer_funcs(struct cell_context *cell) +{ + cell->pipe.get_tex_transfer = cell_get_tex_transfer; + cell->pipe.tex_transfer_destroy = cell_tex_transfer_destroy; + cell->pipe.transfer_map = cell_transfer_map; + cell->pipe.transfer_unmap = cell_transfer_unmap; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 2be0579312..ac0b916775 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -95,5 +95,7 @@ cell_transfer(struct pipe_transfer *pt) extern void cell_init_screen_texture_funcs(struct pipe_screen *screen); +extern void +cell_init_texture_transfer_funcs(struct cell_context *cell); #endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 3d45a22b7e..130519ffa5 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -221,6 +221,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915_init_surface_functions(i915); i915_init_state_functions(i915); i915_init_flush_functions(i915); + i915_init_texture_functions(i915); draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 4994537683..039165b63f 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -350,6 +350,12 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen, /*********************************************************************** + * i915_texture.c + */ +void i915_init_texture_functions(struct i915_context *i915 ); + + +/*********************************************************************** * Inline conversion functions. These are better-typed than the * macros used previously: */ diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index 3ce52cdcdb..b252fb5330 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -795,12 +795,12 @@ i915_tex_surface_destroy(struct pipe_surface *surf) /* - * Screen transfer functions + * Texture transfer functions */ -static struct pipe_transfer* -i915_get_tex_transfer(struct pipe_screen *screen, +static struct pipe_transfer * +i915_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, @@ -837,7 +837,7 @@ i915_get_tex_transfer(struct pipe_screen *screen, } static void * -i915_transfer_map(struct pipe_screen *screen, +i915_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_texture *tex = (struct i915_texture *)transfer->texture; @@ -859,7 +859,7 @@ i915_transfer_map(struct pipe_screen *screen, } static void -i915_transfer_unmap(struct pipe_screen *screen, +i915_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_texture *tex = (struct i915_texture *)transfer->texture; @@ -868,7 +868,8 @@ i915_transfer_unmap(struct pipe_screen *screen, } static void -i915_tex_transfer_destroy(struct pipe_transfer *trans) +i915_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *trans) { pipe_texture_reference(&trans->texture, NULL); FREE(trans); @@ -879,6 +880,14 @@ i915_tex_transfer_destroy(struct pipe_transfer *trans) * Other texture functions */ +void +i915_init_texture_functions(struct i915_context *i915 ) +{ + i915->base.get_tex_transfer = i915_get_tex_transfer; + i915->base.transfer_map = i915_transfer_map; + i915->base.transfer_unmap = i915_transfer_unmap; + i915->base.tex_transfer_destroy = i915_tex_transfer_destroy; +} void i915_init_screen_texture_functions(struct i915_screen *is) @@ -889,8 +898,4 @@ i915_init_screen_texture_functions(struct i915_screen *is) is->base.texture_destroy = i915_texture_destroy; is->base.get_tex_surface = i915_get_tex_surface; is->base.tex_surface_destroy = i915_tex_surface_destroy; - is->base.get_tex_transfer = i915_get_tex_transfer; - is->base.transfer_map = i915_transfer_map; - is->base.transfer_unmap = i915_transfer_unmap; - is->base.tex_transfer_destroy = i915_tex_transfer_destroy; } diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 3dbe2b9130..4bcdcdd17e 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -118,6 +118,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen, brw->sws = brw_screen(screen)->sws; brw->chipset = brw_screen(screen)->chipset; + brw_tex_init( brw ); brw_pipe_blend_init( brw ); brw_pipe_depth_stencil_init( brw ); brw_pipe_framebuffer_init( brw ); diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index e0f3cd2a9f..e3a7c64d48 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -181,6 +181,10 @@ void brw_update_texture( struct brw_screen *brw_screen, struct brw_texture *tex ); +/* brw_screen_texture.h + */ +struct brw_context; +void brw_tex_init( struct brw_context *brw ); void brw_screen_tex_init( struct brw_screen *brw_screen ); void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index cc79bfc771..cadcb7cee2 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -37,6 +37,8 @@ #include "brw_defines.h" #include "brw_structs.h" #include "brw_winsys.h" +#include "brw_context.h" + @@ -479,7 +481,7 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, */ static struct pipe_transfer* -brw_get_tex_transfer(struct pipe_screen *screen, +brw_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, @@ -514,11 +516,11 @@ brw_get_tex_transfer(struct pipe_screen *screen, } static void * -brw_transfer_map(struct pipe_screen *screen, +brw_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct brw_texture *tex = brw_texture(transfer->texture); - struct brw_winsys_screen *sws = brw_screen(screen)->sws; + struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws; char *map; unsigned usage = transfer->usage; @@ -541,23 +543,32 @@ brw_transfer_map(struct pipe_screen *screen, } static void -brw_transfer_unmap(struct pipe_screen *screen, +brw_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct brw_texture *tex = brw_texture(transfer->texture); - struct brw_winsys_screen *sws = brw_screen(screen)->sws; + struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws; sws->bo_unmap(tex->bo); } static void -brw_tex_transfer_destroy(struct pipe_transfer *trans) +brw_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *trans) { pipe_texture_reference(&trans->texture, NULL); FREE(trans); } +void brw_tex_init( struct brw_context *brw ) +{ + brw->base.get_tex_transfer = brw_get_tex_transfer; + brw->base.transfer_map = brw_transfer_map; + brw->base.transfer_unmap = brw_transfer_unmap; + brw->base.tex_transfer_destroy = brw_tex_transfer_destroy; +} + void brw_screen_tex_init( struct brw_screen *brw_screen ) { brw_screen->base.is_format_supported = brw_is_format_supported; @@ -565,8 +576,4 @@ void brw_screen_tex_init( struct brw_screen *brw_screen ) brw_screen->base.texture_from_handle = brw_texture_from_handle; brw_screen->base.texture_get_handle = brw_texture_get_handle; brw_screen->base.texture_destroy = brw_texture_destroy; - brw_screen->base.get_tex_transfer = brw_get_tex_transfer; - brw_screen->base.transfer_map = brw_transfer_map; - brw_screen->base.transfer_unmap = brw_transfer_unmap; - brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index baf0ae4401..26770d6b1e 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -711,6 +711,76 @@ identity_is_buffer_referenced(struct pipe_context *_pipe, buffer); } + + +static struct pipe_transfer * +identity_context_get_tex_transfer(struct pipe_context *_context, + struct pipe_texture *_texture, + unsigned face, + unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, + unsigned y, + unsigned w, + unsigned h) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_texture *id_texture = identity_texture(_texture); + struct pipe_context *context = id_context->pipe; + struct pipe_texture *texture = id_texture->texture; + struct pipe_transfer *result; + + result = context->get_tex_transfer(context, + texture, + face, + level, + zslice, + usage, + x, + y, + w, + h); + + if (result) + return identity_transfer_create(id_context, id_texture, result); + return NULL; +} + +static void +identity_context_tex_transfer_destroy(struct pipe_context *_pipe, + struct pipe_transfer *_transfer) +{ + identity_transfer_destroy(identity_context(_pipe), + identity_transfer(_transfer)); +} + +static void * +identity_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_transfer *id_transfer = identity_transfer(_transfer); + struct pipe_context *context = id_context->pipe; + struct pipe_transfer *transfer = id_transfer->transfer; + + return context->transfer_map(context, + transfer); +} + +static void +identity_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_transfer *id_transfer = identity_transfer(_transfer); + struct pipe_context *context = id_context->pipe; + struct pipe_transfer *transfer = id_transfer->transfer; + + context->transfer_unmap(context, + transfer); +} + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -775,6 +845,10 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.flush = identity_flush; id_pipe->base.is_texture_referenced = identity_is_texture_referenced; id_pipe->base.is_buffer_referenced = identity_is_buffer_referenced; + id_pipe->base.get_tex_transfer = identity_context_get_tex_transfer; + id_pipe->base.tex_transfer_destroy = identity_context_tex_transfer_destroy; + id_pipe->base.transfer_map = identity_context_transfer_map; + id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index 2b1a60c1bf..d37fb0042e 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -30,6 +30,7 @@ #include "id_screen.h" #include "id_objects.h" +#include "id_context.h" struct pipe_buffer * identity_buffer_create(struct identity_screen *id_screen, @@ -142,7 +143,8 @@ identity_surface_destroy(struct identity_surface *id_surface) struct pipe_transfer * -identity_transfer_create(struct identity_texture *id_texture, +identity_transfer_create(struct identity_context *id_context, + struct identity_texture *id_texture, struct pipe_transfer *transfer) { struct identity_transfer *id_transfer; @@ -159,25 +161,25 @@ identity_transfer_create(struct identity_texture *id_texture, memcpy(&id_transfer->base, transfer, sizeof(struct pipe_transfer)); id_transfer->base.texture = NULL; - pipe_texture_reference(&id_transfer->base.texture, &id_texture->base); id_transfer->transfer = transfer; + + pipe_texture_reference(&id_transfer->base.texture, &id_texture->base); assert(id_transfer->base.texture == &id_texture->base); return &id_transfer->base; error: - transfer->texture->screen->tex_transfer_destroy(transfer); + id_context->pipe->tex_transfer_destroy(id_context->pipe, transfer); return NULL; } void -identity_transfer_destroy(struct identity_transfer *id_transfer) +identity_transfer_destroy(struct identity_context *id_context, + struct identity_transfer *id_transfer) { - struct identity_screen *id_screen = identity_screen(id_transfer->base.texture->screen); - struct pipe_screen *screen = id_screen->screen; - pipe_texture_reference(&id_transfer->base.texture, NULL); - screen->tex_transfer_destroy(id_transfer->transfer); + id_context->pipe->tex_transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 77cc719079..7333ecfb7f 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -35,6 +35,7 @@ #include "id_screen.h" +struct identity_context; struct identity_buffer { @@ -64,6 +65,7 @@ struct identity_transfer { struct pipe_transfer base; + struct pipe_context *pipe; struct pipe_transfer *transfer; }; @@ -177,11 +179,13 @@ void identity_surface_destroy(struct identity_surface *id_surface); struct pipe_transfer * -identity_transfer_create(struct identity_texture *id_texture, +identity_transfer_create(struct identity_context *id_context, + struct identity_texture *id_texture, struct pipe_transfer *transfer); void -identity_transfer_destroy(struct identity_transfer *id_transfer); +identity_transfer_destroy(struct identity_context *id_context, + struct identity_transfer *id_transfer); struct pipe_video_surface * identity_video_surface_create(struct identity_screen *id_screen, diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index b9d0f003d7..419b146578 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -207,71 +207,6 @@ identity_screen_tex_surface_destroy(struct pipe_surface *_surface) identity_surface_destroy(identity_surface(_surface)); } -static struct pipe_transfer * -identity_screen_get_tex_transfer(struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned face, - unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, - unsigned y, - unsigned w, - unsigned h) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_texture *id_texture = identity_texture(_texture); - struct pipe_screen *screen = id_screen->screen; - struct pipe_texture *texture = id_texture->texture; - struct pipe_transfer *result; - - result = screen->get_tex_transfer(screen, - texture, - face, - level, - zslice, - usage, - x, - y, - w, - h); - - if (result) - return identity_transfer_create(id_texture, result); - return NULL; -} - -static void -identity_screen_tex_transfer_destroy(struct pipe_transfer *_transfer) -{ - identity_transfer_destroy(identity_transfer(_transfer)); -} - -static void * -identity_screen_transfer_map(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_transfer *id_transfer = identity_transfer(_transfer); - struct pipe_screen *screen = id_screen->screen; - struct pipe_transfer *transfer = id_transfer->transfer; - - return screen->transfer_map(screen, - transfer); -} - -static void -identity_screen_transfer_unmap(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_transfer *id_transfer = identity_transfer(_transfer); - struct pipe_screen *screen = id_screen->screen; - struct pipe_transfer *transfer = id_transfer->transfer; - - screen->transfer_unmap(screen, - transfer); -} static struct pipe_buffer * identity_screen_buffer_create(struct pipe_screen *_screen, @@ -488,10 +423,6 @@ identity_screen_create(struct pipe_screen *screen) id_screen->base.texture_destroy = identity_screen_texture_destroy; id_screen->base.get_tex_surface = identity_screen_get_tex_surface; id_screen->base.tex_surface_destroy = identity_screen_tex_surface_destroy; - id_screen->base.get_tex_transfer = identity_screen_get_tex_transfer; - id_screen->base.tex_transfer_destroy = identity_screen_tex_transfer_destroy; - id_screen->base.transfer_map = identity_screen_transfer_map; - id_screen->base.transfer_unmap = identity_screen_transfer_unmap; id_screen->base.buffer_create = identity_screen_buffer_create; id_screen->base.user_buffer_create = identity_screen_user_buffer_create; if (screen->buffer_map) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 41ac1cee72..89c06ea3ad 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -55,7 +55,7 @@ testprogs := lp_test_format \ LIBS += $(GL_LIB_DEPS) -L. -lllvmpipe -L../../auxiliary/ -lgallium -$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a - $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group +#$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a +# $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group -default: $(testprogs) +#default: $(testprogs) diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index bf4c9a5727..3c3fd386b5 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -12,7 +12,11 @@ Done so far is: - depth testing - - texture sampling (not all state/formats are supported) + - texture sampling + - 1D/2D/3D/cube maps supported + - all texture wrap modes supported + - all texture filtering modes supported + - perhaps not all texture formats yet supported - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception @@ -37,8 +41,6 @@ To do (probably by this order): - code generate stipple and stencil testing - - translate the remaining bits of texture sampling state - - translate TGSI control flow instructions, and all other remaining opcodes - integrate with the draw module for VS code generation @@ -57,7 +59,7 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.6. + - LLVM 2.6 (or later) For Linux, on a recent Debian based distribution do: @@ -67,6 +69,9 @@ Requirements http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment variable to the extracted path. + The version of LLVM from SVN ("2.7svn") from mid-March 2010 seems pretty + stable and has some features not in version 2.6. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -140,11 +145,13 @@ Development Notes then skim through the lp_bld_* functions called in there, and the comments at the top of the lp_bld_*.c functions. -- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a stand-alone Gallium state -> LLVM IR translation module. +- The driver-independent parts of the LLVM / Gallium code are found in + src/gallium/auxiliary/gallivm/. The filenames and function prefixes + need to be renamed from "lp_bld_" to something else though. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html for a stand-alone example. + See the llvm-c/Core.h file for reference. diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c index dab20cb639..6e0f37393e 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c @@ -33,7 +33,6 @@ #include "lp_screen.h" #include "lp_buffer.h" -#include "state_tracker/sw_winsys.h" static void * llvmpipe_buffer_map(struct pipe_screen *screen, diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index d94efec16a..945e3e0558 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -173,6 +173,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_context_texture_funcs( &llvmpipe->pipe ); /* * Create drawing context and plug our rendering stage into it. diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 217ec59b68..f391871b0e 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -45,7 +45,7 @@ struct draw_stage; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; -struct setup_context; +struct lp_setup_context; struct lp_velems_state; struct llvmpipe_context { @@ -98,7 +98,7 @@ struct llvmpipe_context { int psize_slot; /** The tiling engine */ - struct setup_context *setup; + struct lp_setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index bf832433be..636d72a9bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -79,12 +79,12 @@ llvmpipe_flush( struct pipe_context *pipe, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); } if (0) { util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); } ++frame_no; @@ -92,3 +92,68 @@ llvmpipe_flush( struct pipe_context *pipe, #endif } + +/** + * Flush context if necessary. + * + * TODO: move this logic to an auxiliary library? + * + * FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for + * textures to avoid blocking. + */ +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush) +{ + struct pipe_fence_handle *last_fence = NULL; + unsigned referenced; + + referenced = pipe->is_texture_referenced(pipe, texture, face, level); + + if ((referenced & PIPE_REFERENCED_FOR_WRITE) || + ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + + if (do_not_flush) + return FALSE; + + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; + + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + + if (cpu_access) { + /* + * Flush and wait. + */ + + struct pipe_fence_handle *fence = NULL; + + pipe->flush(pipe, flush_flags, &fence); + + if (last_fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } else { + /* + * Just flush. + */ + + pipe->flush(pipe, flush_flags, NULL); + } + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 10b2b52583..e13f57ccec 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -28,10 +28,22 @@ #ifndef LP_FLUSH_H #define LP_FLUSH_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_fence_handle; void llvmpipe_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index dd9a8e8856..81ea11a16b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -62,18 +62,20 @@ lp_rast_begin( struct lp_rasterizer *rast, rast->state.write_color = write_color; for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; rast->cbuf[i].map = scene->cbuf_map[i]; - rast->cbuf[i].format = scene->cbuf_transfer[i]->texture->format; - rast->cbuf[i].width = scene->cbuf_transfer[i]->width; - rast->cbuf[i].height = scene->cbuf_transfer[i]->height; - rast->cbuf[i].stride = scene->cbuf_transfer[i]->stride; + rast->cbuf[i].format = cbuf->texture->format; + rast->cbuf[i].width = cbuf->width; + rast->cbuf[i].height = cbuf->height; + rast->cbuf[i].stride = llvmpipe_texture_stride(cbuf->texture, cbuf->level); } if (write_zstencil) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; rast->zsbuf.map = scene->zsbuf_map; - rast->zsbuf.stride = scene->zsbuf_transfer->stride; + rast->zsbuf.stride = llvmpipe_texture_stride(zsbuf->texture, zsbuf->level); rast->zsbuf.blocksize = - util_format_get_blocksize(scene->zsbuf_transfer->texture->format); + util_format_get_blocksize(zsbuf->texture->format); } lp_scene_bin_iter_begin( scene ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index dc5fc5fc7d..303f6e3f7e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -95,7 +95,7 @@ struct lp_rast_shader_inputs { * Rasterization information for a triangle known to be in this bin, * plus inputs to run the shader: * These fields are tile- and bin-independent. - * Objects of this type are put into the setup_context::data buffer. + * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { #ifdef DEBUG diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 72492c0f0c..681ce674d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -397,7 +397,6 @@ end: static boolean lp_scene_map_buffers( struct lp_scene *scene ) { - struct pipe_screen *screen = scene->pipe->screen; struct pipe_surface *cbuf, *zsbuf; int i; @@ -409,20 +408,10 @@ lp_scene_map_buffers( struct lp_scene *scene ) for (i = 0; i < scene->fb.nr_cbufs; i++) { cbuf = scene->fb.cbufs[i]; if (cbuf) { - scene->cbuf_transfer[i] = screen->get_tex_transfer(screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!scene->cbuf_transfer[i]) - goto fail; - - scene->cbuf_map[i] = screen->transfer_map(screen, - scene->cbuf_transfer[i]); + scene->cbuf_map[i] = llvmpipe_texture_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); if (!scene->cbuf_map[i]) goto fail; } @@ -432,20 +421,10 @@ lp_scene_map_buffers( struct lp_scene *scene ) */ zsbuf = scene->fb.zsbuf; if (zsbuf) { - scene->zsbuf_transfer = screen->get_tex_transfer(screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!scene->zsbuf_transfer) - goto fail; - - scene->zsbuf_map = screen->transfer_map(screen, - scene->zsbuf_transfer); + scene->zsbuf_map = llvmpipe_texture_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); if (!scene->zsbuf_map) goto fail; } @@ -469,28 +448,27 @@ fail: static void lp_scene_unmap_buffers( struct lp_scene *scene ) { - struct pipe_screen *screen = scene->pipe->screen; unsigned i; for (i = 0; i < scene->fb.nr_cbufs; i++) { - if (scene->cbuf_map[i]) - screen->transfer_unmap(screen, scene->cbuf_transfer[i]); - - if (scene->cbuf_transfer[i]) - screen->tex_transfer_destroy(scene->cbuf_transfer[i]); - - scene->cbuf_transfer[i] = NULL; - scene->cbuf_map[i] = NULL; + if (scene->cbuf_map[i]) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_texture_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + scene->cbuf_map[i] = NULL; + } } - if (scene->zsbuf_map) - screen->transfer_unmap(screen, scene->zsbuf_transfer); - - if (scene->zsbuf_transfer) - screen->tex_transfer_destroy(scene->zsbuf_transfer); - - scene->zsbuf_transfer = NULL; - scene->zsbuf_map = NULL; + if (scene->zsbuf_map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_texture_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + scene->zsbuf_map = NULL; + } util_unreference_framebuffer_state( &scene->fb ); } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index 739ac22908..b602b1e8a0 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -114,8 +114,6 @@ struct texture_ref { */ struct lp_scene { struct pipe_context *pipe; - struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; - struct pipe_transfer *zsbuf_transfer; /* Scene's buffers are mapped at the time the scene is enqueued: */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 5093f58bb1..f1bbc2092c 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -204,8 +204,10 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return FALSE; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if(!winsys->is_displaytarget_format_supported(winsys, format)) + if(tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { + if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) return FALSE; } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index d977f98cfa..af25e043cc 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -34,7 +34,7 @@ #ifndef LP_SCREEN_H #define LP_SCREEN_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include <llvm-c/ExecutionEngine.h> #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index c870f89d01..16128c34c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -52,11 +52,11 @@ #include "draw/draw_vbuf.h" -static void set_scene_state( struct setup_context *, unsigned ); +static void set_scene_state( struct lp_setup_context *, unsigned ); struct lp_scene * -lp_setup_get_current_scene(struct setup_context *setup) +lp_setup_get_current_scene(struct lp_setup_context *setup) { if (!setup->scene) { @@ -74,7 +74,7 @@ lp_setup_get_current_scene(struct setup_context *setup) static void -first_triangle( struct setup_context *setup, +first_triangle( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]) @@ -85,7 +85,7 @@ first_triangle( struct setup_context *setup, } static void -first_line( struct setup_context *setup, +first_line( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { @@ -95,7 +95,7 @@ first_line( struct setup_context *setup, } static void -first_point( struct setup_context *setup, +first_point( struct lp_setup_context *setup, const float (*v0)[4]) { set_scene_state( setup, SETUP_ACTIVE ); @@ -103,7 +103,7 @@ first_point( struct setup_context *setup, setup->point( setup, v0 ); } -static void reset_context( struct setup_context *setup ) +static void reset_context( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -131,7 +131,7 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct setup_context *setup, +lp_setup_rasterize_scene( struct lp_setup_context *setup, boolean write_depth ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -148,7 +148,7 @@ lp_setup_rasterize_scene( struct setup_context *setup, static void -begin_binning( struct setup_context *setup ) +begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -184,7 +184,7 @@ begin_binning( struct setup_context *setup ) * TODO: fast path for fullscreen clears and no triangles. */ static void -execute_clears( struct setup_context *setup ) +execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -194,7 +194,7 @@ execute_clears( struct setup_context *setup ) static void -set_scene_state( struct setup_context *setup, +set_scene_state( struct lp_setup_context *setup, unsigned new_state ) { unsigned old_state = setup->state; @@ -229,7 +229,7 @@ set_scene_state( struct setup_context *setup, void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -239,7 +239,7 @@ lp_setup_flush( struct setup_context *setup, void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -256,7 +256,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup, void -lp_setup_clear( struct setup_context *setup, +lp_setup_clear( struct lp_setup_context *setup, const float *color, double depth, unsigned stencil, @@ -314,7 +314,7 @@ lp_setup_clear( struct setup_context *setup, * Emit a fence. */ struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ) +lp_setup_fence( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ @@ -334,10 +334,11 @@ lp_setup_fence( struct setup_context *setup ) void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface, - boolean scissor ) + boolean scissor, + boolean gl_rasterization_rules) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -345,12 +346,13 @@ lp_setup_set_triangle_state( struct setup_context *setup, setup->cullmode = cull_mode; setup->triangle = first_triangle; setup->scissor_test = scissor; + setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f; } void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { @@ -361,7 +363,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ) @@ -376,7 +378,7 @@ lp_setup_set_fs_functions( struct setup_context *setup, } void -lp_setup_set_fs_constants(struct setup_context *setup, +lp_setup_set_fs_constants(struct lp_setup_context *setup, struct pipe_buffer *buffer) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); @@ -388,7 +390,7 @@ lp_setup_set_fs_constants(struct setup_context *setup, void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ) { LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); @@ -400,7 +402,7 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, } void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -415,7 +417,7 @@ lp_setup_set_blend_color( struct setup_context *setup, void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -430,7 +432,7 @@ lp_setup_set_scissor( struct setup_context *setup, void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ) { setup->flatshade_first = flatshade_first; @@ -438,7 +440,7 @@ lp_setup_set_flatshade_first( struct setup_context *setup, void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *vertex_info ) { /* XXX: just silently holding onto the pointer: @@ -451,7 +453,7 @@ lp_setup_set_vertex_info( struct setup_context *setup, * Called during state validation when LP_NEW_TEXTURE is set. */ void -lp_setup_set_sampler_textures( struct setup_context *setup, +lp_setup_set_sampler_textures( struct lp_setup_context *setup, unsigned num, struct pipe_texture **texture) { unsigned i; @@ -512,7 +514,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, * being rendered and the current scene being built. */ unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, +lp_setup_is_texture_referenced( const struct lp_setup_context *setup, const struct pipe_texture *texture ) { unsigned i; @@ -541,7 +543,7 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, * Called by vbuf code when we're about to draw something. */ void -lp_setup_update_state( struct setup_context *setup ) +lp_setup_update_state( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -659,7 +661,7 @@ lp_setup_update_state( struct setup_context *setup ) /* Only caller is lp_setup_vbuf_destroy() */ void -lp_setup_destroy( struct setup_context *setup ) +lp_setup_destroy( struct lp_setup_context *setup ) { reset_context( setup ); @@ -684,12 +686,12 @@ lp_setup_destroy( struct setup_context *setup ) * the draw module. Currently also creates a rasterizer to use with * it. */ -struct setup_context * +struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { unsigned i; - struct setup_context *setup = CALLOC_STRUCT(setup_context); + struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context); if (!setup) return NULL; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 17c112b528..be1bf96f12 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -61,78 +61,79 @@ struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; -struct setup_context * +struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ); void -lp_setup_clear(struct setup_context *setup, +lp_setup_clear(struct lp_setup_context *setup, const float *clear_color, double clear_depth, unsigned clear_stencil, unsigned flags); struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ); +lp_setup_fence( struct lp_setup_context *setup ); void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ); void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ); void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cullmode, boolean front_is_ccw, - boolean scissor ); + boolean scissor, + boolean gl_rasterization_rules ); void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, unsigned nr ); void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ); void -lp_setup_set_fs_constants(struct setup_context *setup, +lp_setup_set_fs_constants(struct lp_setup_context *setup, struct pipe_buffer *buffer); void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ); void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ); void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ); void -lp_setup_set_sampler_textures( struct setup_context *setup, +lp_setup_set_sampler_textures( struct lp_setup_context *setup, unsigned num, struct pipe_texture **texture); unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, +lp_setup_is_texture_referenced( const struct lp_setup_context *setup, const struct pipe_texture *texture ); void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ); void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *info ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a5fc34e54a..464fb36984 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -65,7 +65,7 @@ struct lp_scene_queue; * Subclass of vbuf_render, plugged directly into the draw module as * the rendering backend. */ -struct setup_context +struct lp_setup_context { struct vbuf_render base; @@ -89,6 +89,7 @@ struct setup_context boolean ccw_is_frontface; boolean scissor_test; unsigned cullmode; + float pixel_offset; struct pipe_framebuffer_state fb; @@ -131,29 +132,29 @@ struct setup_context unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ - void (*point)( struct setup_context *, + void (*point)( struct lp_setup_context *, const float (*v0)[4]); - void (*line)( struct setup_context *, + void (*line)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4]); - void (*triangle)( struct setup_context *, + void (*triangle)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]); }; -void lp_setup_choose_triangle( struct setup_context *setup ); -void lp_setup_choose_line( struct setup_context *setup ); -void lp_setup_choose_point( struct setup_context *setup ); +void lp_setup_choose_triangle( struct lp_setup_context *setup ); +void lp_setup_choose_line( struct lp_setup_context *setup ); +void lp_setup_choose_point( struct lp_setup_context *setup ); -struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); +struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup); -void lp_setup_init_vbuf(struct setup_context *setup); +void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct setup_context *setup ); +void lp_setup_update_state( struct lp_setup_context *setup ); -void lp_setup_destroy( struct setup_context *setup ); +void lp_setup_destroy( struct lp_setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index feea79d394..be41c44e6f 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -31,7 +31,7 @@ #include "lp_setup_context.h" -static void line_nop( struct setup_context *setup, +static void line_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4] ) { @@ -39,7 +39,7 @@ static void line_nop( struct setup_context *setup, void -lp_setup_choose_line( struct setup_context *setup ) +lp_setup_choose_line( struct lp_setup_context *setup ) { setup->line = line_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index f03ca729b2..9f69e6c5ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -31,14 +31,14 @@ #include "lp_setup_context.h" -static void point_nop( struct setup_context *setup, +static void point_nop( struct lp_setup_context *setup, const float (*v0)[4] ) { } void -lp_setup_choose_point( struct setup_context *setup ) +lp_setup_choose_point( struct lp_setup_context *setup ) { setup->point = point_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index e75412ac9a..ac6264dc73 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -41,7 +41,8 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_rast_triangle *tri, +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, const float value, unsigned i ) @@ -56,7 +57,8 @@ static void constant_coef( struct lp_rast_triangle *tri, * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct lp_rast_triangle *tri, +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -90,8 +92,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * instead - i'll switch to this later. */ tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -103,7 +105,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_rast_triangle *tri, +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -125,8 +128,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -137,7 +140,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, +setup_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -153,27 +157,28 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri, tri->inputs.dadx[slot][1] = 0.0; tri->inputs.dady[slot][1] = 1.0; /*Z*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); /*W*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); } -static void setup_facing_coef( struct lp_rast_triangle *tri, +static void setup_facing_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, boolean frontface ) { - constant_coef( tri, slot, 1.0f - frontface, 0 ); - constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ + constant_coef( setup, tri, slot, 1.0f - frontface, 0 ); + constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ } /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct setup_context *setup, +static void setup_tri_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, float oneoverarea, const float (*v1)[4], @@ -185,7 +190,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3); /* setup interpolation for all the remaining attributes: */ @@ -196,27 +201,27 @@ static void setup_tri_coefficients( struct setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri, slot+1, v3[vert_attr][i], i); + constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_POSITION: /* XXX: fix me - duplicates the values in slot zero. */ - setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - setup_facing_coef(tri, slot+1, frontface); + setup_facing_coef(setup, tri, slot+1, frontface); break; default: @@ -274,19 +279,19 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) * bins for the tiles which we overlap. */ static void -do_triangle_ccw(struct setup_context *setup, +do_triangle_ccw(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0]); - const int x2 = subpixel_snap(v2[0][0]); - const int x3 = subpixel_snap(v3[0][0]); - const int y1 = subpixel_snap(v1[0][1]); - const int y2 = subpixel_snap(v2[0][1]); - const int y3 = subpixel_snap(v3[0][1]); + const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); + const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); + const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); + const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); + const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); + const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *tri; @@ -565,7 +570,7 @@ do_triangle_ccw(struct setup_context *setup, } -static void triangle_cw( struct setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -574,7 +579,7 @@ static void triangle_cw( struct setup_context *setup, } -static void triangle_ccw( struct setup_context *setup, +static void triangle_ccw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -583,7 +588,7 @@ static void triangle_ccw( struct setup_context *setup, } -static void triangle_both( struct setup_context *setup, +static void triangle_both( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -602,7 +607,7 @@ static void triangle_both( struct setup_context *setup, } -static void triangle_nop( struct setup_context *setup, +static void triangle_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -611,7 +616,7 @@ static void triangle_nop( struct setup_context *setup, void -lp_setup_choose_triangle( struct setup_context *setup ) +lp_setup_choose_triangle( struct lp_setup_context *setup ) { switch (setup->cullmode) { case PIPE_WINDING_NONE: diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 671e74465c..d7336d82b2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -48,10 +48,10 @@ /** cast wrapper */ -static struct setup_context * -setup_context(struct vbuf_render *vbr) +static struct lp_setup_context * +lp_setup_context(struct vbuf_render *vbr) { - return (struct setup_context *) vbr; + return (struct lp_setup_context *) vbr; } @@ -59,7 +59,7 @@ setup_context(struct vbuf_render *vbr) static const struct vertex_info * lp_setup_get_vertex_info(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); return setup->vertex_info; } @@ -68,7 +68,7 @@ static boolean lp_setup_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); unsigned size = vertex_size * nr_vertices; if (setup->vertex_buffer_size < size) { @@ -92,7 +92,7 @@ lp_setup_release_vertices(struct vbuf_render *vbr) static void * lp_setup_map_vertices(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); return setup->vertex_buffer; } @@ -101,7 +101,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, ushort min_index, ushort max_index ) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); /* do nothing */ } @@ -110,7 +110,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, static boolean lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) { - setup_context(vbr)->prim = prim; + lp_setup_context(vbr)->prim = prim; return TRUE; } @@ -129,7 +129,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, static void lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = setup->vertex_buffer; unsigned i; @@ -284,7 +284,7 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) static void lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = (void *) get_vert(setup->vertex_buffer, start, stride); @@ -436,7 +436,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(setup_context(vbr)); + lp_setup_destroy(lp_setup_context(vbr)); } @@ -444,7 +444,7 @@ lp_setup_vbuf_destroy(struct vbuf_render *vbr) * Create the post-transform vertex handler for the given context. */ void -lp_setup_init_vbuf(struct setup_context *setup) +lp_setup_init_vbuf(struct lp_setup_context *setup) { setup->base.max_indices = LP_MAX_VBUF_INDEXES; setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index a5a1a72074..34dd5234ce 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,7 +31,7 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 9a8de0edfd..64f988d6d0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -252,7 +252,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); - in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + in_out_mask = lp_build_const_int_vec(i32_type, ~0); lp_build_flow_scope_declare(flow, &in_out_mask); @@ -367,7 +367,7 @@ build_int32_vec_const(int value) i32_type.norm = FALSE; /* values are not normalized */ i32_type.width = 32; /* 32-bit int values */ i32_type.length = 4; /* 4 elements per vector */ - return lp_build_int_const_scalar(i32_type, value); + return lp_build_const_int_vec(i32_type, value); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index feb012816c..6df3ef25b0 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -62,7 +62,8 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, lp_setup_set_triangle_state( llvmpipe->setup, llvmpipe->rasterizer->cull_mode, llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, - llvmpipe->rasterizer->scissor); + llvmpipe->rasterizer->scissor, + llvmpipe->rasterizer->gl_rasterization_rules); } llvmpipe->dirty |= LP_NEW_RASTERIZER; diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 6110b0a193..ca3d62c361 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -27,6 +27,7 @@ #include "util/u_rect.h" #include "lp_context.h" +#include "lp_flush.h" #include "lp_surface.h" @@ -36,6 +37,20 @@ lp_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { + llvmpipe_flush_texture(pipe, + dest->texture, dest->face, dest->level, + 0, /* flush_flags */ + FALSE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + + llvmpipe_flush_texture(pipe, + src->texture, src->face, src->level, + 0, /* flush_flags */ + TRUE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index 1df9897898..338a04a487 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -41,7 +41,7 @@ #include <stdio.h> #include <float.h> -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 2c4d7fb6e1..fb595893bd 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -29,7 +29,7 @@ #include <stdlib.h> #include <stdio.h> -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index 799df182b6..1228a831f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,7 +29,7 @@ #define LP_TEX_SAMPLE_H -#include "os/os_llvm.h" +#include "gallivm/lp_bld.h" struct lp_sampler_static_state; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 74b7b4ec5e..93ad789c35 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,6 +40,7 @@ #include "lp_context.h" #include "lp_screen.h" +#include "lp_flush.h" #include "lp_texture.h" #include "lp_tile_size.h" #include "state_tracker/sw_winsys.h" @@ -102,6 +103,7 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, unsigned height = align(lpt->base.height0, TILE_SIZE); lpt->dt = winsys->displaytarget_create(winsys, + lpt->base.tex_usage, lpt->base.format, width, height, 16, @@ -163,6 +165,137 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) } +/** + * Map a texture. Without any synchronization. + */ +void * +llvmpipe_texture_map(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + uint8_t *map; + + if (lpt->dt) { + /* display target */ + struct llvmpipe_screen *screen = llvmpipe_screen(texture->screen); + struct sw_winsys *winsys = screen->winsys; + const unsigned usage = PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + /* FIXME: keep map count? */ + map = winsys->displaytarget_map(winsys, lpt->dt, usage); + } + else { + /* regular texture */ + unsigned offset; + unsigned stride; + + map = lpt->data; + + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + + offset = lpt->level_offset[level]; + stride = lpt->stride[level]; + + /* XXX shouldn't that rather be + tex_height = align(u_minify(texture->height0, level), 2) + to account for alignment done in llvmpipe_texture_layout ? + */ + if (texture->target == PIPE_TEXTURE_CUBE) { + unsigned tex_height = u_minify(texture->height0, level); + offset += face * util_format_get_nblocksy(texture->format, tex_height) * stride; + } + else if (texture->target == PIPE_TEXTURE_3D) { + unsigned tex_height = u_minify(texture->height0, level); + offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + map += offset; + } + + return map; +} + + +/** + * Unmap a texture. Without any synchronization. + */ +void +llvmpipe_texture_unmap(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + + if (lpt->dt) { + /* display target */ + struct llvmpipe_screen *lp_screen = llvmpipe_screen(texture->screen); + struct sw_winsys *winsys = lp_screen->winsys; + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + winsys->displaytarget_unmap(winsys, lpt->dt); + } +} + + +static struct pipe_texture * +llvmpipe_texture_from_handle(struct pipe_screen *screen, + const struct pipe_texture *template, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_texture *lpt = CALLOC_STRUCT(llvmpipe_texture); + if (!lpt) + return NULL; + + lpt->base = *template; + pipe_reference_init(&lpt->base.reference, 1); + lpt->base.screen = screen; + + lpt->dt = winsys->displaytarget_from_handle(winsys, + template, + whandle, + &lpt->stride[0]); + if (!lpt->dt) + goto fail; + + return &lpt->base; + + fail: + FREE(lpt); + return NULL; +} + + +static boolean +llvmpipe_texture_get_handle(struct pipe_screen *screen, + struct pipe_texture *pt, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_texture *lpt = llvmpipe_texture(pt); + + assert(lpt->dt); + if (!lpt->dt) + return FALSE; + + return winsys->displaytarget_get_handle(winsys, lpt->dt, whandle); +} + + static struct pipe_surface * llvmpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -181,7 +314,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = lpt->level_offset[level]; ps->usage = usage; /* Because we are llvmpipe, anything that the state tracker @@ -207,23 +339,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->face = face; ps->level = level; ps->zslice = zslice; - - /* XXX shouldn't that rather be - tex_height = align(ps->height, 2); - to account for alignment done in llvmpipe_texture_layout ? - */ - if (pt->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = ps->height; - ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - unsigned tex_height = ps->height; - ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -243,7 +358,7 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf) static struct pipe_transfer * -llvmpipe_get_tex_transfer(struct pipe_screen *screen, +llvmpipe_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -269,24 +384,6 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, pt->level = level; pt->zslice = zslice; - lpt->offset = lptex->level_offset[level]; - - /* XXX shouldn't that rather be - tex_height = align(u_minify(texture->height0, level), 2) - to account for alignment done in llvmpipe_texture_layout ? - */ - if (texture->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else if (texture->target == PIPE_TEXTURE_3D) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else { - assert(face == 0); - assert(zslice == 0); - } return pt; } return NULL; @@ -294,7 +391,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, static void -llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer) +llvmpipe_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -307,11 +405,11 @@ llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer) static void * -llvmpipe_transfer_map( struct pipe_screen *_screen, +llvmpipe_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - ubyte *map, *xfer_map; + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + ubyte *map; struct llvmpipe_texture *lpt; enum pipe_format format; @@ -319,52 +417,45 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, lpt = llvmpipe_texture(transfer->texture); format = lpt->base.format; - if (lpt->dt) { - /* display target */ - struct sw_winsys *winsys = screen->winsys; + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + llvmpipe_flush_texture(pipe, + transfer->texture, transfer->face, transfer->level, + 0, /* flush_flags */ + !(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */ + TRUE, /* cpu_access */ + FALSE); /* do_not_flush */ - map = winsys->displaytarget_map(winsys, lpt->dt, - pipe_transfer_buffer_flags(transfer)); - if (map == NULL) - return NULL; - } - else { - /* regular texture */ - map = lpt->data; - } + map = llvmpipe_texture_map(transfer->texture, + transfer->face, transfer->level, transfer->zslice); /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Do something to notify sharing contexts of a texture change. */ screen->timestamp++; } - xfer_map = map + llvmpipe_transfer(transfer)->offset + + map += transfer->y / util_format_get_blockheight(format) * transfer->stride + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); - /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ - return xfer_map; + + return map; } static void -llvmpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) +llvmpipe_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) { - struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); - struct llvmpipe_texture *lpt; - assert(transfer->texture); - lpt = llvmpipe_texture(transfer->texture); - if (lpt->dt) { - /* display target */ - struct sw_winsys *winsys = lp_screen->winsys; - winsys->displaytarget_unmap(winsys, lpt->dt); - } + llvmpipe_texture_unmap(transfer->texture, + transfer->face, transfer->level, transfer->zslice); } @@ -373,12 +464,18 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = llvmpipe_texture_create; screen->texture_destroy = llvmpipe_texture_destroy; + screen->texture_get_handle = llvmpipe_texture_get_handle; screen->get_tex_surface = llvmpipe_get_tex_surface; screen->tex_surface_destroy = llvmpipe_tex_surface_destroy; +} - screen->get_tex_transfer = llvmpipe_get_tex_transfer; - screen->tex_transfer_destroy = llvmpipe_tex_transfer_destroy; - screen->transfer_map = llvmpipe_transfer_map; - screen->transfer_unmap = llvmpipe_transfer_unmap; + +void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = llvmpipe_get_tex_transfer; + pipe->tex_transfer_destroy = llvmpipe_tex_transfer_destroy; + pipe->transfer_map = llvmpipe_transfer_map; + pipe->transfer_unmap = llvmpipe_transfer_unmap; } diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index b23f929b16..2350c26e4f 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -95,8 +95,32 @@ llvmpipe_transfer(struct pipe_transfer *pt) } +static INLINE unsigned +llvmpipe_texture_stride(struct pipe_texture *texture, + unsigned level) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + return lpt->stride[level]; +} + + +void * +llvmpipe_texture_map(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice); + +void +llvmpipe_texture_unmap(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice); + extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); +extern void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 0e02680bc6..0cb66041d5 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nouveau C_SOURCES = nouveau_screen.c \ - nouveau_context.c \ - nv04_surface_2d.c + nouveau_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index 7f16e31c3f..ab7761a31d 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -33,7 +33,7 @@ nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, max = max - (max % 3); break; case PIPE_PRIM_QUADS: - max = max & 3; + max = max & ~3; break; case PIPE_PRIM_LINE_LOOP: case PIPE_PRIM_LINE_STRIP: diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index af9ddd558c..bed014b9ce 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -27,10 +27,7 @@ #define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile deleted file mode 100644 index 364c80d8f3..0000000000 --- a/src/gallium/drivers/nv30/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv30 - -C_SOURCES = \ - nv30_clear.c \ - nv30_context.c \ - nv30_draw.c \ - nv30_fragprog.c \ - nv30_fragtex.c \ - nv30_miptree.c \ - nv30_query.c \ - nv30_screen.c \ - nv30_state.c \ - nv30_state_blend.c \ - nv30_state_emit.c \ - nv30_state_fb.c \ - nv30_state_rasterizer.c \ - nv30_state_scissor.c \ - nv30_state_stipple.c \ - nv30_state_viewport.c \ - nv30_state_zsa.c \ - nv30_surface.c \ - nv30_transfer.c \ - nv30_vbo.c \ - nv30_vertprog.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c deleted file mode 100644 index c4ba926664..0000000000 --- a/src/gallium/drivers/nv30/nv30_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv30_context.h" - -void -nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, &nv30_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c deleted file mode 100644 index 279b74445c..0000000000 --- a/src/gallium/drivers/nv30/nv30_context.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nv30_context.h" -#include "nv30_screen.h" - -static void -nv30_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, rankine, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, rankine, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv30_destroy(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned i; - - for (i = 0; i < NV30_STATE_MAX; i++) { - if (nv30->state.hw[i]) - so_ref(NULL, &nv30->state.hw[i]); - } - - if (nv30->draw) - draw_destroy(nv30->draw); - FREE(nv30); -} - -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv) -{ - struct nv30_screen *screen = nv30_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv30_context *nv30; - struct nouveau_winsys *nvws = screen->nvws; - - nv30 = CALLOC(1, sizeof(struct nv30_context)); - if (!nv30) - return NULL; - nv30->screen = screen; - - nv30->nvws = nvws; - - nv30->pipe.winsys = ws; - nv30->pipe.screen = pscreen; - nv30->pipe.priv = priv; - nv30->pipe.destroy = nv30_destroy; - nv30->pipe.draw_arrays = nv30_draw_arrays; - nv30->pipe.draw_elements = nv30_draw_elements; - nv30->pipe.clear = nv30_clear; - nv30->pipe.flush = nv30_flush; - - nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nv30; - screen->base.channel->flush_notify = nv30_state_flush_notify; - - nv30_init_query_functions(nv30); - nv30_init_surface_functions(nv30); - nv30_init_state_functions(nv30); - - /* Create, configure, and install fallback swtnl path */ - nv30->draw = draw_create(); - draw_wide_point_threshold(nv30->draw, 9999999.0); - draw_wide_line_threshold(nv30->draw, 9999999.0); - draw_enable_line_stipple(nv30->draw, FALSE); - draw_enable_point_sprites(nv30->draw, FALSE); - draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); - - return &nv30->pipe; -} diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h deleted file mode 100644 index 1786460aec..0000000000 --- a/src/gallium/drivers/nv30/nv30_context.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef __NV30_CONTEXT_H__ -#define __NV30_CONTEXT_H__ - -#include <stdio.h> - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_inlines.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" -#include "nouveau/nouveau_stateobj.h" - -#include "nv30_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -enum nv30_state_index { - NV30_STATE_FB = 0, - NV30_STATE_VIEWPORT = 1, - NV30_STATE_BLEND = 2, - NV30_STATE_RAST = 3, - NV30_STATE_ZSA = 4, - NV30_STATE_BCOL = 5, - NV30_STATE_CLIP = 6, - NV30_STATE_SCISSOR = 7, - NV30_STATE_STIPPLE = 8, - NV30_STATE_FRAGPROG = 9, - NV30_STATE_VERTPROG = 10, - NV30_STATE_FRAGTEX0 = 11, - NV30_STATE_FRAGTEX1 = 12, - NV30_STATE_FRAGTEX2 = 13, - NV30_STATE_FRAGTEX3 = 14, - NV30_STATE_FRAGTEX4 = 15, - NV30_STATE_FRAGTEX5 = 16, - NV30_STATE_FRAGTEX6 = 17, - NV30_STATE_FRAGTEX7 = 18, - NV30_STATE_FRAGTEX8 = 19, - NV30_STATE_FRAGTEX9 = 20, - NV30_STATE_FRAGTEX10 = 21, - NV30_STATE_FRAGTEX11 = 22, - NV30_STATE_FRAGTEX12 = 23, - NV30_STATE_FRAGTEX13 = 24, - NV30_STATE_FRAGTEX14 = 25, - NV30_STATE_FRAGTEX15 = 26, - NV30_STATE_VERTTEX0 = 27, - NV30_STATE_VERTTEX1 = 28, - NV30_STATE_VERTTEX2 = 29, - NV30_STATE_VERTTEX3 = 30, - NV30_STATE_VTXBUF = 31, - NV30_STATE_VTXFMT = 32, - NV30_STATE_VTXATTR = 33, - NV30_STATE_SR = 34, - NV30_STATE_MAX = 35 -}; - -#include "nv30_screen.h" - -#define NV30_NEW_BLEND (1 << 0) -#define NV30_NEW_RAST (1 << 1) -#define NV30_NEW_ZSA (1 << 2) -#define NV30_NEW_SAMPLER (1 << 3) -#define NV30_NEW_FB (1 << 4) -#define NV30_NEW_STIPPLE (1 << 5) -#define NV30_NEW_SCISSOR (1 << 6) -#define NV30_NEW_VIEWPORT (1 << 7) -#define NV30_NEW_BCOL (1 << 8) -#define NV30_NEW_VERTPROG (1 << 9) -#define NV30_NEW_FRAGPROG (1 << 10) -#define NV30_NEW_ARRAYS (1 << 11) -#define NV30_NEW_UCP (1 << 12) -#define NV30_NEW_SR (1 << 13) - -struct nv30_rasterizer_state { - struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv30_zsa_state { - struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv30_blend_state { - struct pipe_blend_state pipe; - struct nouveau_stateobj *so; -}; - - -struct nv30_state { - unsigned scissor_enabled; - unsigned stipple_enabled; - unsigned fp_samplers; - - uint64_t dirty; - struct nouveau_stateobj *hw[NV30_STATE_MAX]; -}; - -struct nv30_vtxelt_state { - struct pipe_vertex_element pipe[16]; - unsigned num_elements; -}; - -struct nv30_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv30_screen *screen; - - struct draw_context *draw; - - /* HW state derived from pipe states */ - struct nv30_state state; - - /* Context state */ - unsigned dirty; - struct pipe_scissor_state scissor; - unsigned stipple[32]; - struct nv30_vertex_program *vertprog; - struct nv30_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - struct nv30_rasterizer_state *rasterizer; - struct nv30_zsa_state *zsa; - struct nv30_blend_state *blend; - struct pipe_blend_color blend_colour; - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; - unsigned idxbuf_format; - struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - unsigned nr_textures; - unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - unsigned vtxbuf_nr; - struct nv30_vtxelt_state *vtxelt; -}; - -static INLINE struct nv30_context * -nv30_context(struct pipe_context *pipe) -{ - return (struct nv30_context *)pipe; -} - -struct nv30_state_entry { - boolean (*validate)(struct nv30_context *nv30); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; - -extern void nv30_init_state_functions(struct nv30_context *nv30); -extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_query_functions(struct nv30_context *nv30); - -extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv30_draw.c */ -extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); - -/* nv30_vertprog.c */ -extern void nv30_vertprog_destroy(struct nv30_context *, - struct nv30_vertex_program *); - -/* nv30_fragprog.c */ -extern void nv30_fragprog_destroy(struct nv30_context *, - struct nv30_fragment_program *); - -/* nv30_fragtex.c */ -extern void nv30_fragtex_bind(struct nv30_context *); - -/* nv30_state.c and friends */ -extern boolean nv30_state_validate(struct nv30_context *nv30); -extern void nv30_state_emit(struct nv30_context *nv30); -extern void nv30_state_flush_notify(struct nouveau_channel *chan); -extern struct nv30_state_entry nv30_state_rasterizer; -extern struct nv30_state_entry nv30_state_scissor; -extern struct nv30_state_entry nv30_state_stipple; -extern struct nv30_state_entry nv30_state_fragprog; -extern struct nv30_state_entry nv30_state_vertprog; -extern struct nv30_state_entry nv30_state_blend; -extern struct nv30_state_entry nv30_state_blend_colour; -extern struct nv30_state_entry nv30_state_zsa; -extern struct nv30_state_entry nv30_state_viewport; -extern struct nv30_state_entry nv30_state_framebuffer; -extern struct nv30_state_entry nv30_state_fragtex; -extern struct nv30_state_entry nv30_state_vbo; -extern struct nv30_state_entry nv30_state_sr; - -/* nv30_vbo.c */ -extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv30_clear.c */ -extern void nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - -/* nv30_context.c */ -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c deleted file mode 100644 index 74fc138c05..0000000000 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "draw/draw_pipe.h" - -#include "nv30_context.h" - -struct nv30_draw_stage { - struct draw_stage draw; - struct nv30_context *nv30; -}; - -static void -nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv30_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -struct draw_stage * -nv30_draw_render_stage(struct nv30_context *nv30) -{ - struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); - - nv30draw->nv30 = nv30; - nv30draw->draw.draw = nv30->draw; - nv30draw->draw.point = nv30_draw_point; - nv30draw->draw.line = nv30_draw_line; - nv30draw->draw.tri = nv30_draw_tri; - nv30draw->draw.flush = nv30_draw_flush; - nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; - nv30draw->draw.destroy = nv30_draw_destroy; - - return &nv30draw->draw; -} - diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c deleted file mode 100644 index 2c432c6dfa..0000000000 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ /dev/null @@ -1,905 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv30_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv30_fpc { - struct nv30_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - int high_temp; - int temp_temp_count; - int num_regs; - - uint depth_id; - uint colour_id; - - unsigned inst_offset; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nv30_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nv30_sreg -temp(struct nv30_fpc *fpc) -{ - int idx; - - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static INLINE struct nv30_sreg -constant(struct nv30_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv30_fpc *fpc, int size) -{ - struct nv30_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_INPUT: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); - break; - case NV30SR_OUTPUT: - sr |= NV30_FP_REG_SRC_HALF; - /* fall-through */ - case NV30SR_TEMP: - sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV30_FP_REG_SRC_SHIFT); - break; - case NV30SR_CONST: - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nv30_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); - break; - case NV30SR_NONE: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NV30SR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NV30SR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NV30_FP_OP_OUT_REG_HALF; - } - break; - case NV30SR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); -} - -static void -nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NV30_FP_OP_OPCODE_KIL) - fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NV30_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - - nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nv30_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->Register.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->Register.Index < fpc->nr_imm); - src = fpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; - break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ - case TGSI_FILE_OUTPUT: - if (fsrc->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - if (fdst->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - case TGSI_FILE_TEMPORARY: - idx = fdst->Register.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nv30_sr(NV30SR_TEMP, idx); - case TGSI_FILE_NULL: - return nv30_sr(NV30SR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv30_sr(NV30SR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv30_sreg *src) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg src[3], dst, tmp; - int mask, sat, unit = 0; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - fpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->Register.Index); - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->Register.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = nv30_sr(NV30SR_NONE, 0); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV30_VP_INST_COND_GE; - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV30_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KILP: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KIL: - dst = nv30_sr(NV30SR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - arith(fpc, sat, POW, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RSQ: - arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) - { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - } - else - { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - } - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXP: - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->Range.First; - break; - case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->Range.First; - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_prepare(struct nv30_fpc *fpc) -{ - struct tgsi_parse_context p; - /*int high_temp = -1, i;*/ - - tgsi_parse_init(&p, fpc->fp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv30_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - /*case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break;*/ - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - - imm = &p.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - vals[0] = imm->u[0].Float; - vals[1] = imm->u[1].Float; - vals[2] = imm->u[2].Float; - vals[3] = imm->u[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - default: - break; - } - } - tgsi_parse_free(&p); - - /*if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); - for (i = 0; i < high_temp; i++) - fpc->r_temp[i] = temp(fpc); - fpc->r_temps_discard = 0; - }*/ - - return TRUE; - -out_err: - /*if (fpc->r_temp) - FREE(fpc->r_temp);*/ - tgsi_parse_free(&p); - return FALSE; -} - -static void -nv30_fragprog_translate(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv30_fpc *fpc = NULL; - - tgsi_dump(fp->pipe.tokens,0); - - fpc = CALLOC(1, sizeof(struct nv30_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; - - if (!nv30_fragprog_prepare(fpc)) { - FREE(fpc); - return; - } - - tgsi_parse_init(&parse, fp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv30_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= (fpc->num_regs-1)/2; - fp->fp_reg_control = (1<<16)|0x4; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; - fp->on_hw = FALSE; -out_err: - tgsi_parse_free(&parse); - FREE(fpc); -} - -static void -nv30_fragprog_upload(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - struct pipe_screen *pscreen = nv30->pipe.screen; - const uint32_t le = 1; - uint32_t *map; - int i; - - map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - fflush(stdout); fflush(stderr); - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - fflush(stdout); fflush(stderr); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - pipe_buffer_unmap(pscreen, fp->buffer); -} - -static boolean -nv30_fragprog_validate(struct nv30_context *nv30) -{ - struct nv30_fragment_program *fp = nv30->fragprog; - struct pipe_buffer *constbuf = - nv30->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_stateobj *so; - boolean new_consts = FALSE; - int i; - - if (fp->translated) - goto update_constants; - - /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ - nv30_fragprog_translate(nv30, fp); - if (!fp->translated) { - /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ - return FALSE; - } - - fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv30_fragprog_upload(nv30, fp); - - so = so_new(4, 4, 1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); - so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, fp->fp_reg_control); - so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); - so_data (so, fp->samplers); - so_ref(so, &fp->so); - so_ref(NULL, &so); - -update_constants: - if (fp->nr_consts) { - float *map; - - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nv30_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - new_consts = TRUE; - } - pipe_buffer_unmap(pscreen, constbuf); - - if (new_consts) - nv30_fragprog_upload(nv30, fp); - } - - if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { - so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_fragprog_destroy(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - if (fp->buffer) - pipe_buffer_reference(&fp->buffer, NULL); - - if (fp->so) - so_ref(NULL, &fp->so); - - if (fp->insn_len) - FREE(fp->insn); -} - -struct nv30_state_entry nv30_state_fragprog = { - .validate = nv30_fragprog_validate, - .dirty = { - .pipe = NV30_NEW_FRAGPROG, - .hw = NV30_STATE_FRAGPROG - } -}; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c deleted file mode 100644 index bfa27b632f..0000000000 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ /dev/null @@ -1,240 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv30_context.h" -#include "../nouveau/nv04_surface_2d.h" - -static void -nv30_miptree_layout(struct nv30_miptree *nv30mt) -{ - struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_SCANOUT); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth0; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - nv30mt->level[l].pitch = util_format_get_stride(pt->format, width); - - nv30mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - nv30mt->level[l].image_offset[f] = offset; - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->total_size = offset; -} - -static struct pipe_texture * -nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) -{ - struct nv30_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; - - mt = MALLOC(sizeof(struct nv30_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else { - switch (pt->format) { - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - { - if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - break; - } - default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - } - } - - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv30_miptree_layout(mt); - - mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, - mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - - return &mt->base; -} - -static struct pipe_texture * -nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nv30_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv30_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static void -nv30_miptree_destroy(struct pipe_texture *pt) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)pt; - int l; - - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - - FREE(mt); -} - -static struct pipe_surface * -nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv30mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = nv30mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = nv30mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; - - return &ns->base; -} - -static void -nv30_miptree_surface_del(struct pipe_surface *ps) -{ - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) - { - struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv30_miptree_surface_del(&ns->backing->base); - } - - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void -nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv30_miptree_create; - pscreen->texture_destroy = nv30_miptree_destroy; - pscreen->get_tex_surface = nv30_miptree_surface_new; - pscreen->tex_surface_destroy = nv30_miptree_surface_del; - - nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket; -} diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c deleted file mode 100644 index e27e9ccbf6..0000000000 --- a/src/gallium/drivers/nv30/nv30_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv30_context.h" - -struct nv30_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv30_query * -nv30_query(struct pipe_query *pipe) -{ - return (struct nv30_query *)pipe; -} - -static struct pipe_query * -nv30_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv30_query *q; - - q = CALLOC(1, sizeof(struct nv30_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_query *q = nv30_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nv30->screen->query, q->object->start); - - BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - struct nv30_query *q = nv30_query(pq); - - BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nv30->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - - nouveau_notifier_wait_status(nv30->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, 0); - } - - q->result = nouveau_notifier_return_val(nv30->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv30_init_query_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_query = nv30_query_create; - nv30->pipe.destroy_query = nv30_query_destroy; - nv30->pipe.begin_query = nv30_query_begin; - nv30->pipe.end_query = nv30_query_end; - nv30->pipe.get_query_result = nv30_query_result; -} diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h deleted file mode 100644 index 8591cd31ca..0000000000 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __NV30_SCREEN_H__ -#define __NV30_SCREEN_H__ - -#include "nouveau/nouveau_screen.h" - -#include "nouveau/nv04_surface_2d.h" - -struct nv30_screen { - struct nouveau_screen base; - - struct nouveau_winsys *nvws; - - struct nv30_context *cur_ctx; - - /* HW graphics objects */ - struct nv04_surface_2d *eng2d; - struct nouveau_grobj *rankine; - struct nouveau_notifier *sync; - - /* Query object resources */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; - - /* Vtxprog resources */ - struct nouveau_resource *vp_exec_heap; - struct nouveau_resource *vp_data_heap; - - /* Current 3D state of channel */ - struct nouveau_stateobj *state[NV30_STATE_MAX]; -}; - -static INLINE struct nv30_screen * -nv30_screen(struct pipe_screen *screen) -{ - return (struct nv30_screen *)screen; -} - -void -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen); - -#endif diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h deleted file mode 100644 index dd3a36f78f..0000000000 --- a/src/gallium/drivers/nv30/nv30_shader.h +++ /dev/null @@ -1,490 +0,0 @@ -#ifndef __NV30_SHADER_H__ -#define __NV30_SHADER_H__ - -/* Vertex programs instruction set - * - * 128bit opcodes, split into 4 32-bit ones for ease of use. - * - * Non-native instructions - * ABS - MOV + NV40_VP_INST0_DEST_ABS - * POW - EX2 + MUL + LG2 - * SUB - ADD, second source negated - * SWZ - MOV - * XPD - - * - * Register access - * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) - * - Only one CONST can be accessed per-instruction (move extras into TEMPs) - * - * Relative Addressing - * According to the value returned for - * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB - * - * there are only two address registers available. The destination in the - * ARL instruction is set to TEMP <n> (The temp isn't actually written). - * - * When using vanilla ARB_v_p, the proprietary driver will squish both the - * available ADDRESS regs into the first hardware reg in the X and Y - * components. - * - * To use an address reg as an index into consts, the CONST_SRC is set to - * (const_base + offset) and INDEX_CONST is set. - * - * To access the second address reg use ADDR_REG_SELECT_1. A particular - * component of the address regs is selected with ADDR_SWZ. - * - * Only one address register can be accessed per instruction. - * - * Conditional execution (see NV_vertex_program{2,3} for details) Conditional - * execution of an instruction is enabled by setting COND_TEST_ENABLE, and - * selecting the condition which will allow the test to pass with - * COND_{FL,LT,...}. It is possible to swizzle the values in the condition - * register, which allows for testing against an individual component. - * - * Branching: - * - * The BRA/CAL instructions seem to follow a slightly different opcode - * layout. The destination instruction ID (IADDR) overlaps a source field. - * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO - * command, and is incremented automatically on each UPLOAD_INST FIFO - * command. - * - * Conditional branching is achieved by using the condition tests described - * above. There doesn't appear to be dedicated looping instructions, but - * this can be done using a temp reg + conditional branching. - * - * Subroutines may be uploaded before the main program itself, but the first - * executed instruction is determined by the PROGRAM_START_ID FIFO command. - * - */ - -/* DWORD 0 */ - -#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ -#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ -#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ -#define NV30_VP_INST_VEC_RESULT (1 << 20) -#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 -#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) -#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) -#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) -#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) -#define NV30_VP_INST_COND_SHIFT 11 -#define NV30_VP_INST_COND_MASK (0x07 << 11) -# define NV30_VP_INST_COND_FL 0 /* guess */ -# define NV30_VP_INST_COND_LT 1 -# define NV30_VP_INST_COND_EQ 2 -# define NV30_VP_INST_COND_LE 3 -# define NV30_VP_INST_COND_GT 4 -# define NV30_VP_INST_COND_NE 5 -# define NV30_VP_INST_COND_GE 6 -# define NV30_VP_INST_COND_TR 7 /* guess */ -#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 -#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) -#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 -#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) -#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 -#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) -#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) -#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) -#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 -#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) -#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 -#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) - -/* DWORD 1 */ -#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 -#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) -# define NV30_VP_INST_OP_NOP 0x00 -# define NV30_VP_INST_OP_RCP 0x02 -# define NV30_VP_INST_OP_RCC 0x03 -# define NV30_VP_INST_OP_RSQ 0x04 -# define NV30_VP_INST_OP_EXP 0x05 -# define NV30_VP_INST_OP_LOG 0x06 -# define NV30_VP_INST_OP_LIT 0x07 -# define NV30_VP_INST_OP_BRA 0x09 -# define NV30_VP_INST_OP_CAL 0x0B -# define NV30_VP_INST_OP_RET 0x0C -# define NV30_VP_INST_OP_LG2 0x0D -# define NV30_VP_INST_OP_EX2 0x0E -# define NV30_VP_INST_OP_SIN 0x0F -# define NV30_VP_INST_OP_COS 0x10 -#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 -#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) -# define NV30_VP_INST_OP_NOPV 0x00 -# define NV30_VP_INST_OP_MOV 0x01 -# define NV30_VP_INST_OP_MUL 0x02 -# define NV30_VP_INST_OP_ADD 0x03 -# define NV30_VP_INST_OP_MAD 0x04 -# define NV30_VP_INST_OP_DP3 0x05 -# define NV30_VP_INST_OP_DP4 0x07 -# define NV30_VP_INST_OP_DPH 0x06 -# define NV30_VP_INST_OP_DST 0x08 -# define NV30_VP_INST_OP_MIN 0x09 -# define NV30_VP_INST_OP_MAX 0x0A -# define NV30_VP_INST_OP_SLT 0x0B -# define NV30_VP_INST_OP_SGE 0x0C -# define NV30_VP_INST_OP_ARL 0x0D -# define NV30_VP_INST_OP_FRC 0x0E -# define NV30_VP_INST_OP_FLR 0x0F -# define NV30_VP_INST_OP_SEQ 0x10 -# define NV30_VP_INST_OP_SFL 0x11 -# define NV30_VP_INST_OP_SGT 0x12 -# define NV30_VP_INST_OP_SLE 0x13 -# define NV30_VP_INST_OP_SNE 0x14 -# define NV30_VP_INST_OP_STR 0x15 -# define NV30_VP_INST_OP_SSG 0x16 -# define NV30_VP_INST_OP_ARR 0x17 -# define NV30_VP_INST_OP_ARA 0x18 -#define NV30_VP_INST_CONST_SRC_SHIFT 14 -#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) -#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ -#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ -# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ -# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ -# define NV30_VP_INST_IN_NORMAL 2 -# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ -# define NV30_VP_INST_IN_COL1 4 -# define NV30_VP_INST_IN_FOGC 5 -# define NV30_VP_INST_IN_TC0 8 -# define NV30_VP_INST_IN_TC(n) (8+n) -#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ - -/* Please note: the IADDR fields overlap other fields because they are used - * only for branch instructions. See Branching: label above - * - * DWORD 2 - */ -#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ -#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ -#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ -#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ -#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ -#define NV30_VP_INST_IADDR_SHIFT 2 -#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ - -/* DWORD 3 */ -#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ -#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ -#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 -#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) -#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 -#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) -#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 -#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) -#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ -#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ -#define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) -# define NV30_VP_INST_DEST_POS 0 -# define NV30_VP_INST_DEST_BFC0 1 -# define NV30_VP_INST_DEST_BFC1 2 -# define NV30_VP_INST_DEST_COL0 3 -# define NV30_VP_INST_DEST_COL1 4 -# define NV30_VP_INST_DEST_FOGC 5 -# define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) - -#define NV30_VP_INST_LAST (1 << 0) - -/* Useful to split the source selection regs into their pieces */ -#define NV30_VP_SRC0_HIGH_SHIFT 6 -#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 -#define NV30_VP_SRC0_LOW_MASK 0x0000003F -#define NV30_VP_SRC2_HIGH_SHIFT 4 -#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 -#define NV30_VP_SRC2_LOW_MASK 0x0000000F - - -/* Source-register definition - matches NV20 exactly */ -#define NV30_VP_SRC_NEGATE (1<<14) -#define NV30_VP_SRC_SWZ_X_SHIFT 12 -#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) -#define NV30_VP_SRC_SWZ_Y_SHIFT 10 -#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) -#define NV30_VP_SRC_SWZ_Z_SHIFT 8 -#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) -#define NV30_VP_SRC_SWZ_W_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) -#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) -#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) -#define NV30_VP_SRC_REG_TYPE_SHIFT 0 -#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) -#define NV30_VP_SRC_REG_TYPE_TEMP 1 -#define NV30_VP_SRC_REG_TYPE_INPUT 2 -#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 - * otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO - * is implemented simply by not writing to the relevant components of the destination. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - */ - -//== Opcode / Destination selection == -#define NV30_FP_OP_PROGRAM_END (1 << 0) -#define NV30_FP_OP_OUT_REG_SHIFT 1 -#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ -/* Needs to be set when writing outputs to get expected result.. */ -#define NV30_FP_OP_OUT_REG_HALF (1 << 7) -#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV30_FP_OP_OUTMASK_SHIFT 9 -#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV30_FP_OP_OUT_X (1<<9) -# define NV30_FP_OP_OUT_Y (1<<10) -# define NV30_FP_OP_OUT_Z (1<<11) -# define NV30_FP_OP_OUT_W (1<<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV30_FP_OP_INPUT_SRC_SHIFT 13 -#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV30_FP_OP_INPUT_SRC_COL0 0x1 -# define NV30_FP_OP_INPUT_SRC_COL1 0x2 -# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV30_FP_OP_INPUT_SRC_TC0 0x4 -# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -#define NV30_FP_OP_TEX_UNIT_SHIFT 17 -#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ -#define NV30_FP_OP_PRECISION_SHIFT 22 -#define NV30_FP_OP_PRECISION_MASK (3 << 22) -# define NV30_FP_PRECISION_FP32 0 -# define NV30_FP_PRECISION_FP16 1 -# define NV30_FP_PRECISION_FX12 2 -#define NV30_FP_OP_OPCODE_SHIFT 24 -#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV30_FP_OP_OPCODE_NOP 0x00 -# define NV30_FP_OP_OPCODE_MOV 0x01 -# define NV30_FP_OP_OPCODE_MUL 0x02 -# define NV30_FP_OP_OPCODE_ADD 0x03 -# define NV30_FP_OP_OPCODE_MAD 0x04 -# define NV30_FP_OP_OPCODE_DP3 0x05 -# define NV30_FP_OP_OPCODE_DP4 0x06 -# define NV30_FP_OP_OPCODE_DST 0x07 -# define NV30_FP_OP_OPCODE_MIN 0x08 -# define NV30_FP_OP_OPCODE_MAX 0x09 -# define NV30_FP_OP_OPCODE_SLT 0x0A -# define NV30_FP_OP_OPCODE_SGE 0x0B -# define NV30_FP_OP_OPCODE_SLE 0x0C -# define NV30_FP_OP_OPCODE_SGT 0x0D -# define NV30_FP_OP_OPCODE_SNE 0x0E -# define NV30_FP_OP_OPCODE_SEQ 0x0F -# define NV30_FP_OP_OPCODE_FRC 0x10 -# define NV30_FP_OP_OPCODE_FLR 0x11 -# define NV30_FP_OP_OPCODE_KIL 0x12 -# define NV30_FP_OP_OPCODE_PK4B 0x13 -# define NV30_FP_OP_OPCODE_UP4B 0x14 -# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ -# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ -# define NV30_FP_OP_OPCODE_TEX 0x17 -# define NV30_FP_OP_OPCODE_TXP 0x18 -# define NV30_FP_OP_OPCODE_TXD 0x19 -# define NV30_FP_OP_OPCODE_RCP 0x1A -# define NV30_FP_OP_OPCODE_RSQ 0x1B -# define NV30_FP_OP_OPCODE_EX2 0x1C -# define NV30_FP_OP_OPCODE_LG2 0x1D -# define NV30_FP_OP_OPCODE_LIT 0x1E -# define NV30_FP_OP_OPCODE_LRP 0x1F -# define NV30_FP_OP_OPCODE_STR 0x20 -# define NV30_FP_OP_OPCODE_SFL 0x21 -# define NV30_FP_OP_OPCODE_COS 0x22 -# define NV30_FP_OP_OPCODE_SIN 0x23 -# define NV30_FP_OP_OPCODE_PK2H 0x24 -# define NV30_FP_OP_OPCODE_UP2H 0x25 -# define NV30_FP_OP_OPCODE_POW 0x26 -# define NV30_FP_OP_OPCODE_PK4UB 0x27 -# define NV30_FP_OP_OPCODE_UP4UB 0x28 -# define NV30_FP_OP_OPCODE_PK2US 0x29 -# define NV30_FP_OP_OPCODE_UP2US 0x2A -# define NV30_FP_OP_OPCODE_DP2A 0x2E -# define NV30_FP_OP_OPCODE_TXB 0x31 -# define NV30_FP_OP_OPCODE_RFL 0x36 -# define NV30_FP_OP_OPCODE_DIV 0x3A -#define NV30_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV30_FP_OP_OUT_ABS (1 << 29) -#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV30_FP_OP_COND_SHIFT 18 -#define NV30_FP_OP_COND_MASK (0x07 << 18) -# define NV30_FP_OP_COND_FL 0 -# define NV30_FP_OP_COND_LT 1 -# define NV30_FP_OP_COND_EQ 2 -# define NV30_FP_OP_COND_LE 3 -# define NV30_FP_OP_COND_GT 4 -# define NV30_FP_OP_COND_NE 5 -# define NV30_FP_OP_COND_GE 6 -# define NV30_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV30_FP_OP_DST_SCALE_SHIFT 28 -#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV30_FP_OP_DST_SCALE_1X 0 -#define NV30_FP_OP_DST_SCALE_2X 1 -#define NV30_FP_OP_DST_SCALE_4X 2 -#define NV30_FP_OP_DST_SCALE_8X 3 -#define NV30_FP_OP_DST_SCALE_INV_2X 5 -#define NV30_FP_OP_DST_SCALE_INV_4X 6 -#define NV30_FP_OP_DST_SCALE_INV_8X 7 - - -/* high order bits of SRC2 */ -#define NV30_FP_OP_INDEX_INPUT (1 << 30) - -//== Register selection == -#define NV30_FP_REG_TYPE_SHIFT 0 -#define NV30_FP_REG_TYPE_MASK (3 << 0) -# define NV30_FP_REG_TYPE_TEMP 0 -# define NV30_FP_REG_TYPE_INPUT 1 -# define NV30_FP_REG_TYPE_CONST 2 -#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ -#define NV30_FP_REG_SRC_MASK (31 << 2) -#define NV30_FP_REG_SRC_HALF (1 << 8) -#define NV30_FP_REG_SWZ_ALL_SHIFT 9 -#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV30_FP_REG_SWZ_X_SHIFT 9 -#define NV30_FP_REG_SWZ_X_MASK (3 << 9) -#define NV30_FP_REG_SWZ_Y_SHIFT 11 -#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV30_FP_REG_SWZ_Z_SHIFT 13 -#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV30_FP_REG_SWZ_W_SHIFT 15 -#define NV30_FP_REG_SWZ_W_MASK (3 << 15) -# define NV30_FP_SWIZZLE_X 0 -# define NV30_FP_SWIZZLE_Y 1 -# define NV30_FP_SWIZZLE_Z 2 -# define NV30_FP_SWIZZLE_W 3 -#define NV30_FP_REG_NEGATE (1 << 17) - -#define NV30SR_NONE 0 -#define NV30SR_OUTPUT 1 -#define NV30SR_INPUT 2 -#define NV30SR_TEMP 3 -#define NV30SR_CONST 4 - -struct nv30_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv30_sreg -nv30_sr(int type, int index) -{ - struct nv30_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv30_sreg -nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) -{ - struct nv30_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv30_sreg -nv30_sr_neg(struct nv30_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_abs(struct nv30_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_scale(struct nv30_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} - -#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c deleted file mode 100644 index 24b15a63ac..0000000000 --- a/src/gallium/drivers/nv30/nv30_state.c +++ /dev/null @@ -1,750 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -static void * -nv30_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - /* FIXME: Gallium assumes GL_EXT_blend_func_separate. - It is not the case for NV30 */ - so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, rankine, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->blend = hwcso; - nv30->dirty |= NV30_NEW_BLEND; -} - -static void -nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; -/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; - break;*/ - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -static void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv30_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nv30_sampler_state)); - - ps->fmt = 0; - /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format - are the tx format to use. We should store normalized coord flag - in sampler state structure, and set appropriate format in - nvxx_fragtex_build() - */ - /*NV34TCL_TX_FORMAT_RECT*/ - /*if (!cso->normalized_coords) { - ps->fmt |= (1<<14) ; - }*/ - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv30->tex_sampler[unit] = sampler[unit]; - nv30->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv30->nr_samplers; unit++) { - nv30->tex_sampler[unit] = NULL; - nv30->dirty_samplers |= (1 << unit); - } - - nv30->nr_samplers = nr; - nv30->dirty |= NV30_NEW_SAMPLER; -} - -static void -nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv30->tex_miptree[unit], miptree[unit]); - nv30->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv30->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv30->tex_miptree[unit], NULL); - nv30->dirty_samplers |= (1 << unit); - } - - nv30->nr_textures = nr; - nv30->dirty |= NV30_NEW_SAMPLER; -} - -static void * -nv30_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *rankine = nv30->screen->rankine; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : - NV34TCL_SHADE_MODEL_SMOOTH); - - so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, rankine, NV34TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->rasterizer = hwcso; - nv30->dirty |= NV30_NEW_RAST; - /*nv30->draw_dirty |= NV30_NEW_RAST;*/ -} - -static void -nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *rankine = nv30->screen->rankine; - - so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, rankine, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, rankine, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->zsa = hwcso; - nv30->dirty |= NV30_NEW_ZSA; -} - -static void -nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv30_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - /*struct nv30_context *nv30 = nv30_context(pipe);*/ - struct nv30_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv30_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ - - return (void *)vp; -} - -static void -nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->vertprog = hwcso; - nv30->dirty |= NV30_NEW_VERTPROG; - /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ -} - -static void -nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_vertex_program *vp = hwcso; - - /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ - nv30_vertprog_destroy(nv30, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv30_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv30_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv30_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->fragprog = hwcso; - nv30->dirty |= NV30_NEW_FRAGPROG; -} - -static void -nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_fragment_program *fp = hwcso; - - nv30_fragprog_destroy(nv30, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv30_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->blend_colour = *bcol; - nv30->dirty |= NV30_NEW_BCOL; -} - -static void -nv30_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->stencil_ref = *sr; - nv30->dirty |= NV30_NEW_SR; -} - -static void -nv30_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->constbuf[shader] = buf; - nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nv30->dirty |= NV30_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv30->dirty |= NV30_NEW_FRAGPROG; - } -} - -static void -nv30_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->framebuffer = *fb; - nv30->dirty |= NV30_NEW_FB; -} - -static void -nv30_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - memcpy(nv30->stipple, stipple->stipple, 4 * 32); - nv30->dirty |= NV30_NEW_STIPPLE; -} - -static void -nv30_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->scissor = *s; - nv30->dirty |= NV30_NEW_SCISSOR; -} - -static void -nv30_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->viewport = *vpt; - nv30->dirty |= NV30_NEW_VIEWPORT; - /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ -} - -static void -nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); - nv30->vtxbuf_nr = count; - - nv30->dirty |= NV30_NEW_ARRAYS; - /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ -} - -static void * -nv30_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nv30_vtxelt_state *cso = CALLOC_STRUCT(nv30_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nv30_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->vtxelt = hwcso; - nv30->dirty |= NV30_NEW_ARRAYS; - /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ -} - -void -nv30_init_state_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_blend_state = nv30_blend_state_create; - nv30->pipe.bind_blend_state = nv30_blend_state_bind; - nv30->pipe.delete_blend_state = nv30_blend_state_delete; - - nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; - nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; - - nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; - nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; - nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; - - nv30->pipe.create_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_create; - nv30->pipe.bind_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_bind; - nv30->pipe.delete_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_delete; - - nv30->pipe.create_vs_state = nv30_vp_state_create; - nv30->pipe.bind_vs_state = nv30_vp_state_bind; - nv30->pipe.delete_vs_state = nv30_vp_state_delete; - - nv30->pipe.create_fs_state = nv30_fp_state_create; - nv30->pipe.bind_fs_state = nv30_fp_state_bind; - nv30->pipe.delete_fs_state = nv30_fp_state_delete; - - nv30->pipe.set_blend_color = nv30_set_blend_color; - nv30->pipe.set_stencil_ref = nv30_set_stencil_ref; - nv30->pipe.set_clip_state = nv30_set_clip_state; - nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; - nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; - nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; - nv30->pipe.set_scissor_state = nv30_set_scissor_state; - nv30->pipe.set_viewport_state = nv30_set_viewport_state; - - nv30->pipe.create_vertex_elements_state = nv30_vtxelts_state_create; - nv30->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete; - nv30->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind; - - nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; -} - diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h deleted file mode 100644 index 66c26360cb..0000000000 --- a/src/gallium/drivers/nv30/nv30_state.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef __NV30_STATE_H__ -#define __NV30_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" - -struct nv30_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv30_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv30_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv30_vertex_program { - struct pipe_shader_state pipe; - - boolean translated; - - struct nv30_vertex_program_exec *insns; - unsigned nr_insns; - struct nv30_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; - struct nouveau_stateobj *so; -}; - -struct nv30_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv30_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv30_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; - struct nouveau_stateobj *so; -}; - -#define NV30_MAX_TEXTURE_LEVELS 16 - -struct nv30_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[NV30_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c deleted file mode 100644 index c36d58c040..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_blend_validate(struct nv30_context *nv30) -{ - so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); - return TRUE; -} - -struct nv30_state_entry nv30_state_blend = { - .validate = nv30_state_blend_validate, - .dirty = { - .pipe = NV30_NEW_BLEND, - .hw = NV30_STATE_BLEND - } -}; - -static boolean -nv30_state_blend_colour_validate(struct nv30_context *nv30) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nv30->blend_colour; - - so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_blend_colour = { - .validate = nv30_state_blend_colour_validate, - .dirty = { - .pipe = NV30_NEW_BCOL, - .hw = NV30_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c deleted file mode 100644 index deefe7fd8d..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "nv30_context.h" -#include "nv30_state.h" - -static struct nv30_state_entry *render_states[] = { - &nv30_state_framebuffer, - &nv30_state_rasterizer, - &nv30_state_scissor, - &nv30_state_stipple, - &nv30_state_fragprog, - &nv30_state_fragtex, - &nv30_state_vertprog, - &nv30_state_blend, - &nv30_state_blend_colour, - &nv30_state_zsa, - &nv30_state_sr, - &nv30_state_viewport, - &nv30_state_vbo, - NULL -}; - -static void -nv30_state_do_validate(struct nv30_context *nv30, - struct nv30_state_entry **states) -{ - while (*states) { - struct nv30_state_entry *e = *states; - - if (nv30->dirty & e->dirty.pipe) { - if (e->validate(nv30)) { - nv30->state.dirty |= (1ULL << e->dirty.hw); - } - } - - states++; - } - nv30->dirty = 0; -} - -void -nv30_state_emit(struct nv30_context *nv30) -{ - struct nouveau_channel *chan = nv30->screen->base.channel; - struct nv30_state *state = &nv30->state; - struct nv30_screen *screen = nv30->screen; - unsigned i; - uint64_t states; - - /* XXX: racy! - */ - if (nv30 != screen->cur_ctx) { - for (i = 0; i < NV30_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nv30; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nv30->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nv30->screen->state[i]); - states &= ~(1ULL << i); - } - - state->dirty = 0; -} - -void -nv30_state_flush_notify(struct nouveau_channel *chan) -{ - struct nv30_context *nv30 = chan->user_private; - struct nv30_state *state = &nv30->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NV30_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NV30_STATE_FRAGPROG]); - if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) - so_emit_reloc_markers(chan, state->hw[NV30_STATE_VTXBUF]); -} - -boolean -nv30_state_validate(struct nv30_context *nv30) -{ -#if 0 - boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; - - if (nv30->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nv30->fallback_swtnl & nv30->dirty) - != nv30->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nv30->pipe.flush(&nv30->pipe, 0, NULL); - nv30->dirty |= (NV30_NEW_VIEWPORT | - NV30_NEW_VERTPROG | - NV30_NEW_ARRAYS); - nv30->render_mode = HW; - } -#endif - nv30_state_do_validate(nv30, render_states); -#if 0 - if (nv30->fallback_swtnl || nv30->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); -#endif - return TRUE; -} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c deleted file mode 100644 index f7fe9833c7..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ /dev/null @@ -1,173 +0,0 @@ -#include "nv30_context.h" -#include "nouveau/nouveau_util.h" - -static boolean -nv30_state_framebuffer_validate(struct nv30_context *nv30) -{ - struct pipe_framebuffer_state *fb = &nv30->framebuffer; - struct nouveau_channel *chan = nv30->screen->base.channel; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nv04_surface *rt[2], *zeta = NULL; - uint32_t rt_enable = 0, rt_format = 0; - int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(12, 18, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - struct nv30_miptree *nv30mt; - int colour_bits = 32, zeta_bits = 32; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) { - /* Render to at least a colour buffer */ - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else if (fb->zsbuf) { - depth_only = 1; - - /* Render to depth buffer only */ - if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else { - return FALSE; - } - - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - colour_bits = 16; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - zeta_bits = 16; - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (colour_bits > zeta_bits) { - return FALSE; - } - - if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) { - struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); - uint32_t pitch = rt0->pitch; - - if (zeta) { - pitch |= (zeta->pitch << 16); - } else { - pitch |= (pitch << 16); - } - - nv30mt = (struct nv30_miptree *) rt0->base.texture; - so_method(so, rankine, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2); - so_data (so, pitch); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - nv30mt = (struct nv30_miptree *)rt[1]->base.texture; - so_method(so, rankine, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt[1]->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch); - } - - if (zeta_format) { - nv30mt = (struct nv30_miptree *)zeta->base.texture; - so_method(so, rankine, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), zeta->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - /* TODO: allocate LMA depth buffer */ - } - - so_method(so, rankine, NV34TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, rankine, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, rankine, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, rankine, 0x1d88, 1); - so_data (so, (1 << 12) | h); - /* Wonder why this is needed, context should all be set to zero on init */ - so_method(so, rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - so_data (so, 0); - - so_ref(so, &nv30->state.hw[NV30_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_framebuffer = { - .validate = nv30_state_framebuffer_validate, - .dirty = { - .pipe = NV30_NEW_FB, - .hw = NV30_STATE_FB - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c deleted file mode 100644 index 6d1b60e043..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_rasterizer_validate(struct nv30_context *nv30) -{ - so_ref(nv30->rasterizer->so, - &nv30->state.hw[NV30_STATE_RAST]); - return TRUE; -} - -struct nv30_state_entry nv30_state_rasterizer = { - .validate = nv30_state_rasterizer_validate, - .dirty = { - .pipe = NV30_NEW_RAST, - .hw = NV30_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c deleted file mode 100644 index ba61a9e24a..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_scissor_validate(struct nv30_context *nv30) -{ - struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; - struct pipe_scissor_state *s = &nv30->scissor; - struct nouveau_stateobj *so; - - if (nv30->state.hw[NV30_STATE_SCISSOR] && - (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) - return FALSE; - nv30->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); - if (nv30->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_scissor = { - .validate = nv30_state_scissor_validate, - .dirty = { - .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, - .hw = NV30_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c deleted file mode 100644 index ed520a4f43..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ /dev/null @@ -1,40 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_stipple_validate(struct nv30_context *nv30) -{ - struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nouveau_stateobj *so; - - if (nv30->state.hw[NV30_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nv30->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_stipple = { - .validate = nv30_state_stipple_validate, - .dirty = { - .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, - .hw = NV30_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c deleted file mode 100644 index 6fccd6b60e..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ /dev/null @@ -1,42 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_viewport_validate(struct nv30_context *nv30) -{ - struct pipe_viewport_state *vpt = &nv30->viewport; - struct nouveau_stateobj *so; - - if (nv30->state.hw[NV30_STATE_VIEWPORT] && - !(nv30->dirty & NV30_NEW_VIEWPORT)) - return FALSE; - - so = so_new(3, 10, 0); - so_method(so, nv30->screen->rankine, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); -/* so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 1); -*/ - /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ - so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 1); - - so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_viewport = { - .validate = nv30_state_viewport_validate, - .dirty = { - .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, - .hw = NV30_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c deleted file mode 100644 index 88cd74f180..0000000000 --- a/src/gallium/drivers/nv30/nv30_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_zsa_validate(struct nv30_context *nv30) -{ - so_ref(nv30->zsa->so, - &nv30->state.hw[NV30_STATE_ZSA]); - return TRUE; -} - -struct nv30_state_entry nv30_state_zsa = { - .validate = nv30_state_zsa_validate, - .dirty = { - .pipe = NV30_NEW_ZSA, - .hw = NV30_STATE_ZSA - } -}; - -static boolean -nv30_state_sr_validate(struct nv30_context *nv30) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nv30->stencil_ref; - - so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nv30->state.hw[NV30_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_sr = { - .validate = nv30_state_sr_validate, - .dirty = { - .pipe = NV30_NEW_SR, - .hw = NV30_STATE_SR - } -}; diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c deleted file mode 100644 index 3aeda51ea1..0000000000 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ /dev/null @@ -1,178 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "nouveau/nouveau_winsys.h" -#include "nv30_context.h" -#include "nv30_screen.h" -#include "nv30_state.h" - -struct nv30_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)pt; - struct nv30_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv30_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv30_transfer_del(struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv30_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen) -{ - pscreen->get_tex_transfer = nv30_transfer_new; - pscreen->tex_transfer_destroy = nv30_transfer_del; - pscreen->transfer_map = nv30_transfer_map; - pscreen->transfer_unmap = nv30_transfer_unmap; -} diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c deleted file mode 100644 index 809be3712d..0000000000 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ /dev/null @@ -1,842 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) - -struct nv30_vpc { - struct nv30_vertex_program *vp; - - struct nv30_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nv30_sreg *imm; - unsigned nr_imm; -}; - -static struct nv30_sreg -temp(struct nv30_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static struct nv30_sreg -constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nv30_vertex_program *vp = vpc->vp; - struct nv30_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nv30_sr(NV30SR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NV30SR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NV30SR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NV30SR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) -{ - struct nv30_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NV30SR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NV30SR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - - /*XXX: no way this is entirely correct, someone needs to - * figure out what exactly it is. - */ - hw[3] |= 0x800; - break; - default: - assert(0); - } -} - -static void -nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, - struct nv30_sreg s2) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - if (dst.type == NV30SR_OUTPUT) { - if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); - } else { - if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->Register.Index) - vpc->high_temp = fsrc->Register.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv30_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->Register.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nv30_sreg src[3], dst, tmp; - struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_vertprog_prepare(struct nv30_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -static void -nv30_vertprog_translate(struct nv30_context *nv30, - struct nv30_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv30_vpc *vpc = NULL; - - tgsi_dump(vp->pipe.tokens,0); - - vpc = CALLOC(1, sizeof(struct nv30_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv30_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv30_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv30_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(vpc); -} - -static boolean -nv30_vertprog_validate(struct nv30_context *nv30) -{ - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - struct nv30_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - vp = nv30->vertprog; - constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv30_vertprog_translate(nv30, vp); - if (!vp->translated) - return FALSE; - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nv30->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(1, 1, 0); - so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv30->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, - &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nv30_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, vp->insns[i].data[0], vp->insns[i].data[1], - vp->insns[i].data[2], vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { - so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = 0; - so_ref(NULL, &vp->so); -} - -struct nv30_state_entry nv30_state_vertprog = { - .validate = nv30_vertprog_validate, - .dirty = { - .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, - .hw = NV30_STATE_VERTPROG, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile deleted file mode 100644 index 0ecae2b491..0000000000 --- a/src/gallium/drivers/nv40/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv40 - -C_SOURCES = \ - nv40_clear.c \ - nv40_context.c \ - nv40_draw.c \ - nv40_fragprog.c \ - nv40_fragtex.c \ - nv40_miptree.c \ - nv40_query.c \ - nv40_screen.c \ - nv40_state.c \ - nv40_state_blend.c \ - nv40_state_emit.c \ - nv40_state_fb.c \ - nv40_state_rasterizer.c \ - nv40_state_scissor.c \ - nv40_state_stipple.c \ - nv40_state_viewport.c \ - nv40_state_zsa.c \ - nv40_surface.c \ - nv40_transfer.c \ - nv40_vbo.c \ - nv40_vertprog.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c deleted file mode 100644 index 65dc73e88b..0000000000 --- a/src/gallium/drivers/nv40/nv40_context.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nv40_context.h" -#include "nv40_screen.h" - -static void -nv40_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, curie, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, curie, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv40_destroy(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned i; - - for (i = 0; i < NV40_STATE_MAX; i++) { - if (nv40->state.hw[i]) - so_ref(NULL, &nv40->state.hw[i]); - } - - if (nv40->draw) - draw_destroy(nv40->draw); - FREE(nv40); -} - -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv40_context *nv40; - struct nouveau_winsys *nvws = screen->nvws; - - nv40 = CALLOC(1, sizeof(struct nv40_context)); - if (!nv40) - return NULL; - nv40->screen = screen; - - nv40->nvws = nvws; - - nv40->pipe.winsys = ws; - nv40->pipe.priv = priv; - nv40->pipe.screen = pscreen; - nv40->pipe.destroy = nv40_destroy; - nv40->pipe.draw_arrays = nv40_draw_arrays; - nv40->pipe.draw_elements = nv40_draw_elements; - nv40->pipe.clear = nv40_clear; - nv40->pipe.flush = nv40_flush; - - nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nv40; - screen->base.channel->flush_notify = nv40_state_flush_notify; - - nv40_init_query_functions(nv40); - nv40_init_surface_functions(nv40); - nv40_init_state_functions(nv40); - - /* Create, configure, and install fallback swtnl path */ - nv40->draw = draw_create(); - draw_wide_point_threshold(nv40->draw, 9999999.0); - draw_wide_line_threshold(nv40->draw, 9999999.0); - draw_enable_line_stipple(nv40->draw, FALSE); - draw_enable_point_sprites(nv40->draw, FALSE); - draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); - - return &nv40->pipe; -} diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h deleted file mode 100644 index 2550ec654b..0000000000 --- a/src/gallium/drivers/nv40/nv40_context.h +++ /dev/null @@ -1,244 +0,0 @@ -#ifndef __NV40_CONTEXT_H__ -#define __NV40_CONTEXT_H__ - -#include <stdio.h> - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_inlines.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" -#include "nouveau/nouveau_stateobj.h" - -#include "nv40_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -enum nv40_state_index { - NV40_STATE_FB = 0, - NV40_STATE_VIEWPORT = 1, - NV40_STATE_BLEND = 2, - NV40_STATE_RAST = 3, - NV40_STATE_ZSA = 4, - NV40_STATE_BCOL = 5, - NV40_STATE_CLIP = 6, - NV40_STATE_SCISSOR = 7, - NV40_STATE_STIPPLE = 8, - NV40_STATE_FRAGPROG = 9, - NV40_STATE_VERTPROG = 10, - NV40_STATE_FRAGTEX0 = 11, - NV40_STATE_FRAGTEX1 = 12, - NV40_STATE_FRAGTEX2 = 13, - NV40_STATE_FRAGTEX3 = 14, - NV40_STATE_FRAGTEX4 = 15, - NV40_STATE_FRAGTEX5 = 16, - NV40_STATE_FRAGTEX6 = 17, - NV40_STATE_FRAGTEX7 = 18, - NV40_STATE_FRAGTEX8 = 19, - NV40_STATE_FRAGTEX9 = 20, - NV40_STATE_FRAGTEX10 = 21, - NV40_STATE_FRAGTEX11 = 22, - NV40_STATE_FRAGTEX12 = 23, - NV40_STATE_FRAGTEX13 = 24, - NV40_STATE_FRAGTEX14 = 25, - NV40_STATE_FRAGTEX15 = 26, - NV40_STATE_VERTTEX0 = 27, - NV40_STATE_VERTTEX1 = 28, - NV40_STATE_VERTTEX2 = 29, - NV40_STATE_VERTTEX3 = 30, - NV40_STATE_VTXBUF = 31, - NV40_STATE_VTXFMT = 32, - NV40_STATE_VTXATTR = 33, - NV40_STATE_SR = 34, - NV40_STATE_MAX = 35 -}; - -#include "nv40_screen.h" - -#define NV40_NEW_BLEND (1 << 0) -#define NV40_NEW_RAST (1 << 1) -#define NV40_NEW_ZSA (1 << 2) -#define NV40_NEW_SAMPLER (1 << 3) -#define NV40_NEW_FB (1 << 4) -#define NV40_NEW_STIPPLE (1 << 5) -#define NV40_NEW_SCISSOR (1 << 6) -#define NV40_NEW_VIEWPORT (1 << 7) -#define NV40_NEW_BCOL (1 << 8) -#define NV40_NEW_VERTPROG (1 << 9) -#define NV40_NEW_FRAGPROG (1 << 10) -#define NV40_NEW_ARRAYS (1 << 11) -#define NV40_NEW_UCP (1 << 12) -#define NV40_NEW_SR (1 << 13) - -struct nv40_rasterizer_state { - struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv40_zsa_state { - struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv40_blend_state { - struct pipe_blend_state pipe; - struct nouveau_stateobj *so; -}; - - -struct nv40_state { - unsigned scissor_enabled; - unsigned stipple_enabled; - unsigned fp_samplers; - - uint64_t dirty; - struct nouveau_stateobj *hw[NV40_STATE_MAX]; -}; - - -struct nv40_vtxelt_state { - struct pipe_vertex_element pipe[16]; - unsigned num_elements; -}; - -struct nv40_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv40_screen *screen; - - struct draw_context *draw; - - /* HW state derived from pipe states */ - struct nv40_state state; - struct { - struct nv40_vertex_program *vertprog; - - unsigned nr_attribs; - unsigned hw[PIPE_MAX_SHADER_INPUTS]; - unsigned draw[PIPE_MAX_SHADER_INPUTS]; - unsigned emit[PIPE_MAX_SHADER_INPUTS]; - } swtnl; - - enum { - HW, SWTNL, SWRAST - } render_mode; - unsigned fallback_swtnl; - unsigned fallback_swrast; - - /* Context state */ - unsigned dirty, draw_dirty; - struct pipe_scissor_state scissor; - unsigned stipple[32]; - struct pipe_clip_state clip; - struct nv40_vertex_program *vertprog; - struct nv40_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - struct nv40_rasterizer_state *rasterizer; - struct nv40_zsa_state *zsa; - struct nv40_blend_state *blend; - struct pipe_blend_color blend_colour; - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; - unsigned idxbuf_format; - struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - unsigned nr_textures; - unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - unsigned vtxbuf_nr; - struct nv40_vtxelt_state *vtxelt; -}; - -static INLINE struct nv40_context * -nv40_context(struct pipe_context *pipe) -{ - return (struct nv40_context *)pipe; -} - -struct nv40_state_entry { - boolean (*validate)(struct nv40_context *nv40); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; - -extern void nv40_init_state_functions(struct nv40_context *nv40); -extern void nv40_init_surface_functions(struct nv40_context *nv40); -extern void nv40_init_query_functions(struct nv40_context *nv40); - -extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv40_draw.c */ -extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, - unsigned ib_size, unsigned mode, - unsigned start, unsigned count); - -/* nv40_vertprog.c */ -extern void nv40_vertprog_destroy(struct nv40_context *, - struct nv40_vertex_program *); - -/* nv40_fragprog.c */ -extern void nv40_fragprog_destroy(struct nv40_context *, - struct nv40_fragment_program *); - -/* nv40_fragtex.c */ -extern void nv40_fragtex_bind(struct nv40_context *); - -/* nv40_state.c and friends */ -extern boolean nv40_state_validate(struct nv40_context *nv40); -extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); -extern void nv40_state_emit(struct nv40_context *nv40); -extern void nv40_state_flush_notify(struct nouveau_channel *chan); -extern struct nv40_state_entry nv40_state_rasterizer; -extern struct nv40_state_entry nv40_state_scissor; -extern struct nv40_state_entry nv40_state_stipple; -extern struct nv40_state_entry nv40_state_fragprog; -extern struct nv40_state_entry nv40_state_vertprog; -extern struct nv40_state_entry nv40_state_blend; -extern struct nv40_state_entry nv40_state_blend_colour; -extern struct nv40_state_entry nv40_state_zsa; -extern struct nv40_state_entry nv40_state_viewport; -extern struct nv40_state_entry nv40_state_framebuffer; -extern struct nv40_state_entry nv40_state_fragtex; -extern struct nv40_state_entry nv40_state_vbo; -extern struct nv40_state_entry nv40_state_vtxfmt; -extern struct nv40_state_entry nv40_state_sr; - -/* nv40_vbo.c */ -extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv40_clear.c */ -extern void nv40_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - -/* nv40_context.c */ -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c deleted file mode 100644 index 48bd84d16c..0000000000 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ /dev/null @@ -1,360 +0,0 @@ -#include "pipe/p_shader_tokens.h" -#include "util/u_inlines.h" - -#include "util/u_pack_color.h" - -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pipe.h" - -#include "nv40_context.h" -#define NV40_SHADER_NO_FUCKEDNESS -#include "nv40_shader.h" - -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all. Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ - -struct nv40_render_stage { - struct draw_stage stage; - struct nv40_context *nv40; - unsigned prim; -}; - -static INLINE struct nv40_render_stage * -nv40_render_stage(struct draw_stage *stage) -{ - return (struct nv40_render_stage *)stage; -} - -static INLINE void -nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - - for (i = 0; i < nv40->swtnl.nr_attribs; i++) { - unsigned idx = nv40->swtnl.draw[i]; - unsigned hw = nv40->swtnl.hw[i]; - - switch (nv40->swtnl.emit[i]) { - case EMIT_OMIT: - break; - case EMIT_1F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (chan, fui(v->data[idx][0])); - break; - case EMIT_2F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - break; - case EMIT_3F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - break; - case EMIT_4F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - OUT_RING (chan, fui(v->data[idx][3])); - break; - case EMIT_4UB: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][3]))); - break; - default: - assert(0); - break; - } - } -} - -static INLINE void -nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, - unsigned mode, unsigned count) -{ - struct nv40_render_stage *rs = nv40_render_stage(stage); - struct nv40_context *nv40 = rs->nv40; - - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - - /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - NOUVEAU_ERR("AIII, missed flush\n"); - assert(0); - } - FIRE_RING(chan); - nv40_state_emit(nv40); - } - - /* Switch primitive modes if necessary */ - if (rs->prim != mode) { - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, mode); - rs->prim = mode; - } - - /* Emit vertex data */ - for (i = 0; i < count; i++) - nv40_render_vertex(nv40, prim->v[i]); - - /* If it's likely we'll need to empty the push buffer soon, finish - * off the primitive now. - */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - rs->prim = NV40TCL_BEGIN_END_STOP; - } -} - -static void -nv40_render_point(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); -} - -static void -nv40_render_line(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); -} - -static void -nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); -} - -static void -nv40_render_flush(struct draw_stage *draw, unsigned flags) -{ - struct nv40_render_stage *rs = nv40_render_stage(draw); - struct nv40_context *nv40 = rs->nv40; - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - rs->prim = NV40TCL_BEGIN_END_STOP; - } -} - -static void -nv40_render_reset_stipple_counter(struct draw_stage *draw) -{ -} - -static void -nv40_render_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -static INLINE void -emit_mov(struct nv40_vertex_program *vp, - unsigned dst, unsigned src, unsigned vor, unsigned mask) -{ - struct nv40_vertex_program_exec *inst; - - vp->insns = realloc(vp->insns, - sizeof(struct nv40_vertex_program_exec) * - ++vp->nr_insns); - inst = &vp->insns[vp->nr_insns - 1]; - - inst->data[0] = 0x401f9c6c; - inst->data[1] = 0x0040000d | (src << 8); - inst->data[2] = 0x8106c083; - inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); - inst->const_index = -1; - inst->has_branch_offset = FALSE; - - vp->ir |= (1 << src); - if (vor != ~0) - vp->or |= (1 << vor); -} - -static struct nv40_vertex_program * -create_drawvp(struct nv40_context *nv40) -{ - struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); - unsigned i; - - emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); - for (i = 0; i < 8; i++) - emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); - - vp->insns[vp->nr_insns - 1].data[3] |= 1; - vp->translated = TRUE; - return vp; -} - -struct draw_stage * -nv40_draw_render_stage(struct nv40_context *nv40) -{ - struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); - - if (!nv40->swtnl.vertprog) - nv40->swtnl.vertprog = create_drawvp(nv40); - - render->nv40 = nv40; - render->stage.draw = nv40->draw; - render->stage.point = nv40_render_point; - render->stage.line = nv40_render_line; - render->stage.tri = nv40_render_tri; - render->stage.flush = nv40_render_flush; - render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; - render->stage.destroy = nv40_render_destroy; - - return &render->stage; -} - -void -nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, unsigned idxbuf_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - unsigned i; - void *map; - - if (!nv40_state_validate_swtnl(nv40)) - return; - nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); - nv40_state_emit(nv40); - - for (i = 0; i < nv40->vtxbuf_nr; i++) { - map = pipe_buffer_map(pscreen, nv40->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(nv40->draw, i, map); - } - - if (idxbuf) { - map = pipe_buffer_map(pscreen, idxbuf, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); - } else { - draw_set_mapped_element_buffer(nv40->draw, 0, NULL); - } - - if (nv40->constbuf[PIPE_SHADER_VERTEX]) { - const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; - - map = pipe_buffer_map(pscreen, - nv40->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, 0, - map, nr); - } - - draw_arrays(nv40->draw, mode, start, count); - - for (i = 0; i < nv40->vtxbuf_nr; i++) - pipe_buffer_unmap(pscreen, nv40->vtxbuf[i].buffer); - - if (idxbuf) - pipe_buffer_unmap(pscreen, idxbuf); - - if (nv40->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX]); - - draw_flush(nv40->draw); - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, - unsigned semantic, unsigned index) -{ - unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); - unsigned a = nv40->swtnl.nr_attribs++; - - nv40->swtnl.hw[a] = hw; - nv40->swtnl.emit[a] = emit; - nv40->swtnl.draw[a] = draw_out; -} - -static boolean -nv40_state_vtxfmt_validate(struct nv40_context *nv40) -{ - struct nv40_fragment_program *fp = nv40->fragprog; - unsigned colour = 0, texcoords = 0, fog = 0, i; - - /* Determine needed fragprog inputs */ - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - colour |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_GENERIC: - texcoords |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_FOG: - fog = 1; - break; - default: - assert(0); - } - } - - nv40->swtnl.nr_attribs = 0; - - /* Map draw vtxprog output to hw attribute IDs */ - for (i = 0; i < 2; i++) { - if (!(colour & (1 << i))) - continue; - emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); - } - - for (i = 0; i < 8; i++) { - if (!(texcoords & (1 << i))) - continue; - emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); - } - - if (fog) { - emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); - } - - emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); - - return FALSE; -} - -struct nv40_state_entry nv40_state_vtxfmt = { - .validate = nv40_state_vtxfmt_validate, - .dirty = { - .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c deleted file mode 100644 index 8ed4a67dd0..0000000000 --- a/src/gallium/drivers/nv40/nv40_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv40_context.h" - -struct nv40_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv40_query * -nv40_query(struct pipe_query *pipe) -{ - return (struct nv40_query *)pipe; -} - -static struct pipe_query * -nv40_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv40_query *q; - - q = CALLOC(1, sizeof(struct nv40_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_query *q = nv40_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nv40->screen->query, q->object->start); - - BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nv40->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - nouveau_notifier_wait_status(nv40->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, - 0); - } - - q->result = nouveau_notifier_return_val(nv40->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv40_init_query_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_query = nv40_query_create; - nv40->pipe.destroy_query = nv40_query_destroy; - nv40->pipe.begin_query = nv40_query_begin; - nv40->pipe.end_query = nv40_query_end; - nv40->pipe.get_query_result = nv40_query_result; -} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c deleted file mode 100644 index b216c5e38c..0000000000 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ /dev/null @@ -1,320 +0,0 @@ -#include "pipe/p_screen.h" - -#include "nv40_context.h" -#include "nv40_screen.h" - -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - -static int -nv40_screen_get_param(struct pipe_screen *pscreen, int param) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; /* We have 4 - but unsupported currently */ - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - case NOUVEAU_CAP_HW_VTXBUF: - return 1; - case NOUVEAU_CAP_HW_IDXBUF: - if (screen->curie->grclass == NV40TCL) - return 1; - return 0; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv40_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static struct pipe_buffer * -nv40_surface_buffer(struct pipe_surface *surf) -{ - struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; - - return mt->buffer; -} - -static void -nv40_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - unsigned i; - - for (i = 0; i < NV40_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } - - nouveau_resource_destroy(&screen->vp_exec_heap); - nouveau_resource_destroy(&screen->vp_data_heap); - nouveau_resource_destroy(&screen->query_heap); - nouveau_notifier_free(&screen->query); - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->curie); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - struct nouveau_stateobj *so; - unsigned curie_class = 0; - int ret; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv40_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv40_screen_destroy; - pscreen->get_param = nv40_screen_get_param; - pscreen->get_paramf = nv40_screen_get_paramf; - pscreen->is_format_supported = nv40_screen_surface_format_supported; - pscreen->context_create = nv40_create; - - nv40_screen_init_miptree_functions(pscreen); - nv40_screen_init_transfer_functions(pscreen); - - /* 3D object */ - switch (dev->chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV44TCL; - break; - } - - if (!curie_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef3097, curie_class, &screen->curie); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv40_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - nouveau_resource_init(&screen->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 512) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Static curie initialisation */ - so = so_new(16, 25, 0); - so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle); - so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - - so_method(so, screen->curie, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, screen->curie, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, screen->curie, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, screen->curie, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, screen->curie, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, screen->curie, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, screen->curie, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, screen->curie, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); - - return pscreen; -} - diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h deleted file mode 100644 index 854dccf548..0000000000 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ /dev/null @@ -1,556 +0,0 @@ -#ifndef __NV40_SHADER_H__ -#define __NV40_SHADER_H__ - -/* Vertex programs instruction set - * - * The NV40 instruction set is very similar to NV30. Most fields are in - * a slightly different position in the instruction however. - * - * Merged instructions - * In some cases it is possible to put two instructions into one opcode - * slot. The rules for when this is OK is not entirely clear to me yet. - * - * There are separate writemasks and dest temp register fields for each - * grouping of instructions. There is however only one field with the - * ID of a result register. Writing to temp/result regs is selected by - * setting VEC_RESULT/SCA_RESULT. - * - * Temporary registers - * The source/dest temp register fields have been extended by 1 bit, to - * give a total of 32 temporary registers. - * - * Relative Addressing - * NV40 can use an address register to index into vertex attribute regs. - * This is done by putting the offset value into INPUT_SRC and setting - * the INDEX_INPUT flag. - * - * Conditional execution (see NV_vertex_program{2,3} for details) - * There is a second condition code register on NV40, it's use is enabled - * by setting the COND_REG_SELECT_1 flag. - * - * Texture lookup - * TODO - */ - -/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ -#define NV40_VP_INST_VEC_RESULT (1 << 30) -/* uncertain.. */ -#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) -/* use address reg as index into attribs */ -#define NV40_VP_INST_INDEX_INPUT (1 << 27) -#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) -#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV40_VP_INST_SRC2_ABS (1 << 23) -#define NV40_VP_INST_SRC1_ABS (1 << 22) -#define NV40_VP_INST_SRC0_ABS (1 << 21) -#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 -#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) -#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) -#define NV40_VP_INST_COND_SHIFT 10 -#define NV40_VP_INST_COND_MASK (0x7 << 10) -# define NV40_VP_INST_COND_FL 0 -# define NV40_VP_INST_COND_LT 1 -# define NV40_VP_INST_COND_EQ 2 -# define NV40_VP_INST_COND_LE 3 -# define NV40_VP_INST_COND_GT 4 -# define NV40_VP_INST_COND_NE 5 -# define NV40_VP_INST_COND_GE 6 -# define NV40_VP_INST_COND_TR 7 -#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 -#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) -#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 -#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) -#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 -#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) -#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) -#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) -#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 -#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) -#define NV40_VP_INST0_KNOWN ( \ - NV40_VP_INST_INDEX_INPUT | \ - NV40_VP_INST_COND_REG_SELECT_1 | \ - NV40_VP_INST_ADDR_REG_SELECT_1 | \ - NV40_VP_INST_SRC2_ABS | \ - NV40_VP_INST_SRC1_ABS | \ - NV40_VP_INST_SRC0_ABS | \ - NV40_VP_INST_VEC_DEST_TEMP_MASK | \ - NV40_VP_INST_COND_TEST_ENABLE | \ - NV40_VP_INST_COND_MASK | \ - NV40_VP_INST_COND_SWZ_ALL_MASK | \ - NV40_VP_INST_ADDR_SWZ_MASK) - -/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ -#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 -#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_MUL 0x02 -# define NV40_VP_INST_OP_ADD 0x03 -# define NV40_VP_INST_OP_MAD 0x04 -# define NV40_VP_INST_OP_DP3 0x05 -# define NV40_VP_INST_OP_DPH 0x06 -# define NV40_VP_INST_OP_DP4 0x07 -# define NV40_VP_INST_OP_DST 0x08 -# define NV40_VP_INST_OP_MIN 0x09 -# define NV40_VP_INST_OP_MAX 0x0A -# define NV40_VP_INST_OP_SLT 0x0B -# define NV40_VP_INST_OP_SGE 0x0C -# define NV40_VP_INST_OP_ARL 0x0D -# define NV40_VP_INST_OP_FRC 0x0E -# define NV40_VP_INST_OP_FLR 0x0F -# define NV40_VP_INST_OP_SEQ 0x10 -# define NV40_VP_INST_OP_SFL 0x11 -# define NV40_VP_INST_OP_SGT 0x12 -# define NV40_VP_INST_OP_SLE 0x13 -# define NV40_VP_INST_OP_SNE 0x14 -# define NV40_VP_INST_OP_STR 0x15 -# define NV40_VP_INST_OP_SSG 0x16 -# define NV40_VP_INST_OP_ARR 0x17 -# define NV40_VP_INST_OP_ARA 0x18 -# define NV40_VP_INST_OP_TXL 0x19 -#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 -#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_RCP 0x02 -# define NV40_VP_INST_OP_RCC 0x03 -# define NV40_VP_INST_OP_RSQ 0x04 -# define NV40_VP_INST_OP_EXP 0x05 -# define NV40_VP_INST_OP_LOG 0x06 -# define NV40_VP_INST_OP_LIT 0x07 -# define NV40_VP_INST_OP_BRA 0x09 -# define NV40_VP_INST_OP_CAL 0x0B -# define NV40_VP_INST_OP_RET 0x0C -# define NV40_VP_INST_OP_LG2 0x0D -# define NV40_VP_INST_OP_EX2 0x0E -# define NV40_VP_INST_OP_SIN 0x0F -# define NV40_VP_INST_OP_COS 0x10 -# define NV40_VP_INST_OP_PUSHA 0x13 -# define NV40_VP_INST_OP_POPA 0x14 -#define NV40_VP_INST_CONST_SRC_SHIFT 12 -#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) -#define NV40_VP_INST_INPUT_SRC_SHIFT 8 -#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) -# define NV40_VP_INST_IN_POS 0 -# define NV40_VP_INST_IN_WEIGHT 1 -# define NV40_VP_INST_IN_NORMAL 2 -# define NV40_VP_INST_IN_COL0 3 -# define NV40_VP_INST_IN_COL1 4 -# define NV40_VP_INST_IN_FOGC 5 -# define NV40_VP_INST_IN_TC0 8 -# define NV40_VP_INST_IN_TC(n) (8+n) -#define NV40_VP_INST_SRC0H_SHIFT 0 -#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) -#define NV40_VP_INST1_KNOWN ( \ - NV40_VP_INST_VEC_OPCODE_MASK | \ - NV40_VP_INST_SCA_OPCODE_MASK | \ - NV40_VP_INST_CONST_SRC_MASK | \ - NV40_VP_INST_INPUT_SRC_MASK | \ - NV40_VP_INST_SRC0H_MASK \ - ) - -/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ -#define NV40_VP_INST_SRC0L_SHIFT 23 -#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) -#define NV40_VP_INST_SRC1_SHIFT 6 -#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) -#define NV40_VP_INST_SRC2H_SHIFT 0 -#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) -#define NV40_VP_INST_IADDRH_SHIFT 0 -#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) - -/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ -#define NV40_VP_INST_IADDRL_SHIFT 29 -#define NV40_VP_INST_IADDRL_MASK (7 << 29) -#define NV40_VP_INST_SRC2L_SHIFT 21 -#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) -#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 -#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) -# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) -# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) -# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) -# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) -#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 -#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) -# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) -# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) -# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) -# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) -#define NV40_VP_INST_SCA_RESULT (1 << 12) -#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 -#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) -#define NV40_VP_INST_DEST_SHIFT 2 -#define NV40_VP_INST_DEST_MASK (31 << 2) -# define NV40_VP_INST_DEST_POS 0 -# define NV40_VP_INST_DEST_COL0 1 -# define NV40_VP_INST_DEST_COL1 2 -# define NV40_VP_INST_DEST_BFC0 3 -# define NV40_VP_INST_DEST_BFC1 4 -# define NV40_VP_INST_DEST_FOGC 5 -# define NV40_VP_INST_DEST_PSZ 6 -# define NV40_VP_INST_DEST_TC0 7 -# define NV40_VP_INST_DEST_TC(n) (7+n) -# define NV40_VP_INST_DEST_TEMP 0x1F -#define NV40_VP_INST_INDEX_CONST (1 << 1) -#define NV40_VP_INST_LAST (1 << 0) -#define NV40_VP_INST3_KNOWN ( \ - NV40_VP_INST_SRC2L_MASK |\ - NV40_VP_INST_SCA_WRITEMASK_MASK |\ - NV40_VP_INST_VEC_WRITEMASK_MASK |\ - NV40_VP_INST_SCA_DEST_TEMP_MASK |\ - NV40_VP_INST_DEST_MASK |\ - NV40_VP_INST_INDEX_CONST) - -/* Useful to split the source selection regs into their pieces */ -#define NV40_VP_SRC0_HIGH_SHIFT 9 -#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 -#define NV40_VP_SRC0_LOW_MASK 0x000001FF -#define NV40_VP_SRC2_HIGH_SHIFT 11 -#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 -#define NV40_VP_SRC2_LOW_MASK 0x000007FF - -/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ -#define NV40_VP_SRC_NEGATE (1 << 16) -#define NV40_VP_SRC_SWZ_X_SHIFT 14 -#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) -#define NV40_VP_SRC_SWZ_Y_SHIFT 12 -#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) -#define NV40_VP_SRC_SWZ_Z_SHIFT 10 -#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) -#define NV40_VP_SRC_SWZ_W_SHIFT 8 -#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) -#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 -#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) -#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) -#define NV40_VP_SRC_REG_TYPE_SHIFT 0 -#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) -# define NV40_VP_SRC_REG_TYPE_UNK0 0 -# define NV40_VP_SRC_REG_TYPE_TEMP 1 -# define NV40_VP_SRC_REG_TYPE_INPUT 2 -# define NV40_VP_SRC_REG_TYPE_CONST 3 - - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, - * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and - * SWIZZLE_ONE. - * - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as - * SWIZZLE_ZERO is implemented simply by not writing to the relevant components - * of the destination. - * - * Looping - * Loops appear to be fairly expensive on NV40 at least, the proprietary - * driver goes to a lot of effort to avoid using the native looping - * instructions. If the total number of *executed* instructions between - * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. - * The maximum loop count is 255. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - * DP2 - MUL + ADD - * NRM - */ - -//== Opcode / Destination selection == -#define NV40_FP_OP_PROGRAM_END (1 << 0) -#define NV40_FP_OP_OUT_REG_SHIFT 1 -#define NV40_FP_OP_OUT_REG_MASK (63 << 1) -/* Needs to be set when writing outputs to get expected result.. */ -#define NV40_FP_OP_OUT_REG_HALF (1 << 7) -#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV40_FP_OP_OUTMASK_SHIFT 9 -#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV40_FP_OP_OUT_X (1 << 9) -# define NV40_FP_OP_OUT_Y (1 <<10) -# define NV40_FP_OP_OUT_Z (1 <<11) -# define NV40_FP_OP_OUT_W (1 <<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV40_FP_OP_INPUT_SRC_SHIFT 13 -#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV40_FP_OP_INPUT_SRC_COL0 0x1 -# define NV40_FP_OP_INPUT_SRC_COL1 0x2 -# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV40_FP_OP_INPUT_SRC_TC0 0x4 -# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -# define NV40_FP_OP_INPUT_SRC_FACING 0xE -#define NV40_FP_OP_TEX_UNIT_SHIFT 17 -#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) -#define NV40_FP_OP_PRECISION_SHIFT 22 -#define NV40_FP_OP_PRECISION_MASK (3 << 22) -# define NV40_FP_PRECISION_FP32 0 -# define NV40_FP_PRECISION_FP16 1 -# define NV40_FP_PRECISION_FX12 2 -#define NV40_FP_OP_OPCODE_SHIFT 24 -#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV40_FP_OP_OPCODE_NOP 0x00 -# define NV40_FP_OP_OPCODE_MOV 0x01 -# define NV40_FP_OP_OPCODE_MUL 0x02 -# define NV40_FP_OP_OPCODE_ADD 0x03 -# define NV40_FP_OP_OPCODE_MAD 0x04 -# define NV40_FP_OP_OPCODE_DP3 0x05 -# define NV40_FP_OP_OPCODE_DP4 0x06 -# define NV40_FP_OP_OPCODE_DST 0x07 -# define NV40_FP_OP_OPCODE_MIN 0x08 -# define NV40_FP_OP_OPCODE_MAX 0x09 -# define NV40_FP_OP_OPCODE_SLT 0x0A -# define NV40_FP_OP_OPCODE_SGE 0x0B -# define NV40_FP_OP_OPCODE_SLE 0x0C -# define NV40_FP_OP_OPCODE_SGT 0x0D -# define NV40_FP_OP_OPCODE_SNE 0x0E -# define NV40_FP_OP_OPCODE_SEQ 0x0F -# define NV40_FP_OP_OPCODE_FRC 0x10 -# define NV40_FP_OP_OPCODE_FLR 0x11 -# define NV40_FP_OP_OPCODE_KIL 0x12 -# define NV40_FP_OP_OPCODE_PK4B 0x13 -# define NV40_FP_OP_OPCODE_UP4B 0x14 -/* DDX/DDY can only write to XY */ -# define NV40_FP_OP_OPCODE_DDX 0x15 -# define NV40_FP_OP_OPCODE_DDY 0x16 -# define NV40_FP_OP_OPCODE_TEX 0x17 -# define NV40_FP_OP_OPCODE_TXP 0x18 -# define NV40_FP_OP_OPCODE_TXD 0x19 -# define NV40_FP_OP_OPCODE_RCP 0x1A -# define NV40_FP_OP_OPCODE_EX2 0x1C -# define NV40_FP_OP_OPCODE_LG2 0x1D -# define NV40_FP_OP_OPCODE_STR 0x20 -# define NV40_FP_OP_OPCODE_SFL 0x21 -# define NV40_FP_OP_OPCODE_COS 0x22 -# define NV40_FP_OP_OPCODE_SIN 0x23 -# define NV40_FP_OP_OPCODE_PK2H 0x24 -# define NV40_FP_OP_OPCODE_UP2H 0x25 -# define NV40_FP_OP_OPCODE_PK4UB 0x27 -# define NV40_FP_OP_OPCODE_UP4UB 0x28 -# define NV40_FP_OP_OPCODE_PK2US 0x29 -# define NV40_FP_OP_OPCODE_UP2US 0x2A -# define NV40_FP_OP_OPCODE_DP2A 0x2E -# define NV40_FP_OP_OPCODE_TXL 0x2F -# define NV40_FP_OP_OPCODE_TXB 0x31 -# define NV40_FP_OP_OPCODE_DIV 0x3A -# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C -/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ -# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 -# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 -# define NV40_FP_OP_BRA_OPCODE_IF 0x2 -# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 -# define NV40_FP_OP_BRA_OPCODE_REP 0x4 -# define NV40_FP_OP_BRA_OPCODE_RET 0x5 -#define NV40_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV40_FP_OP_OUT_ABS (1 << 29) -#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV40_FP_OP_COND_SHIFT 18 -#define NV40_FP_OP_COND_MASK (0x07 << 18) -# define NV40_FP_OP_COND_FL 0 -# define NV40_FP_OP_COND_LT 1 -# define NV40_FP_OP_COND_EQ 2 -# define NV40_FP_OP_COND_LE 3 -# define NV40_FP_OP_COND_GT 4 -# define NV40_FP_OP_COND_NE 5 -# define NV40_FP_OP_COND_GE 6 -# define NV40_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) -#define NV40_FP_OP_DST_SCALE_SHIFT 28 -#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV40_FP_OP_DST_SCALE_1X 0 -#define NV40_FP_OP_DST_SCALE_2X 1 -#define NV40_FP_OP_DST_SCALE_4X 2 -#define NV40_FP_OP_DST_SCALE_8X 3 -#define NV40_FP_OP_DST_SCALE_INV_2X 5 -#define NV40_FP_OP_DST_SCALE_INV_4X 6 -#define NV40_FP_OP_DST_SCALE_INV_8X 7 - -/* SRC1 LOOP */ -#define NV40_FP_OP_LOOP_INCR_SHIFT 19 -#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) -#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 -#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) -#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 -#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) - -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT 2 -#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) - -/* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT 2 -#define NV40_FP_OP_IADDR_MASK (0xFF << 2) - -/* SRC1 REP - * I have no idea why there are 3 count values here.. but they - * have always been filled with the same value in my tests so - * far.. - */ -#define NV40_FP_OP_REP_COUNT1_SHIFT 2 -#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) -#define NV40_FP_OP_REP_COUNT2_SHIFT 10 -#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) -#define NV40_FP_OP_REP_COUNT3_SHIFT 19 -#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) - -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT 2 -#define NV40_FP_OP_END_ID_MASK (0xFF << 2) - -// SRC2 high-order -#define NV40_FP_OP_INDEX_INPUT (1 << 30) -#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 -#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) - -//== Register selection == -#define NV40_FP_REG_TYPE_SHIFT 0 -#define NV40_FP_REG_TYPE_MASK (3 << 0) -# define NV40_FP_REG_TYPE_TEMP 0 -# define NV40_FP_REG_TYPE_INPUT 1 -# define NV40_FP_REG_TYPE_CONST 2 -#define NV40_FP_REG_SRC_SHIFT 2 -#define NV40_FP_REG_SRC_MASK (63 << 2) -#define NV40_FP_REG_SRC_HALF (1 << 8) -#define NV40_FP_REG_SWZ_ALL_SHIFT 9 -#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV40_FP_REG_SWZ_X_SHIFT 9 -#define NV40_FP_REG_SWZ_X_MASK (3 << 9) -#define NV40_FP_REG_SWZ_Y_SHIFT 11 -#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV40_FP_REG_SWZ_Z_SHIFT 13 -#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV40_FP_REG_SWZ_W_SHIFT 15 -#define NV40_FP_REG_SWZ_W_MASK (3 << 15) -# define NV40_FP_SWIZZLE_X 0 -# define NV40_FP_SWIZZLE_Y 1 -# define NV40_FP_SWIZZLE_Z 2 -# define NV40_FP_SWIZZLE_W 3 -#define NV40_FP_REG_NEGATE (1 << 17) - -#ifndef NV40_SHADER_NO_FUCKEDNESS -#define NV40SR_NONE 0 -#define NV40SR_OUTPUT 1 -#define NV40SR_INPUT 2 -#define NV40SR_TEMP 3 -#define NV40SR_CONST 4 - -struct nv40_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv40_sreg -nv40_sr(int type, int index) -{ - struct nv40_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv40_sreg -nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) -{ - struct nv40_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv40_sreg -nv40_sr_neg(struct nv40_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_abs(struct nv40_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_scale(struct nv40_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} -#endif - -#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c deleted file mode 100644 index ee471e6ce4..0000000000 --- a/src/gallium/drivers/nv40/nv40_state.c +++ /dev/null @@ -1,765 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -static void * -nv40_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_grobj *curie = nv40->screen->curie; - struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | - nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->blend = hwcso; - nv40->dirty |= NV40_NEW_BLEND; -} - -static void -nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV40TCL_TEX_WRAP_S_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; - break; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - } - - return ret >> NV40TCL_TEX_WRAP_S_SHIFT; -} - -static void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv40_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nv40_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV40TCL_TEX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } - - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv40->tex_sampler[unit] = sampler[unit]; - nv40->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv40->nr_samplers; unit++) { - nv40->tex_sampler[unit] = NULL; - nv40->dirty_samplers |= (1 << unit); - } - - nv40->nr_samplers = nr; - nv40->dirty |= NV40_NEW_SAMPLER; -} - -static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv40->tex_miptree[unit], miptree[unit]); - nv40->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv40->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv40->tex_miptree[unit], NULL); - nv40->dirty_samplers |= (1 << unit); - } - - nv40->nr_textures = nr; - nv40->dirty |= NV40_NEW_SAMPLER; -} - -static void * -nv40_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *curie = nv40->screen->curie; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, curie, NV40TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : - NV40TCL_SHADE_MODEL_SMOOTH); - - so_method(so, curie, NV40TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, curie, NV40TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, curie, NV40TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->rasterizer = hwcso; - nv40->dirty |= NV40_NEW_RAST; - nv40->draw_dirty |= NV40_NEW_RAST; -} - -static void -nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *curie = nv40->screen->curie; - - so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, curie, NV40TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, curie, NV40TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->zsa = hwcso; - nv40->dirty |= NV40_NEW_ZSA; -} - -static void -nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv40_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv40_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); - - return (void *)vp; -} - -static void -nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->vertprog = hwcso; - nv40->dirty |= NV40_NEW_VERTPROG; - nv40->draw_dirty |= NV40_NEW_VERTPROG; -} - -static void -nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - - draw_delete_vertex_shader(nv40->draw, vp->draw); - nv40_vertprog_destroy(nv40, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv40_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv40_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->fragprog = hwcso; - nv40->dirty |= NV40_NEW_FRAGPROG; -} - -static void -nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - - nv40_fragprog_destroy(nv40, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv40_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->blend_colour = *bcol; - nv40->dirty |= NV40_NEW_BCOL; -} - - static void -nv40_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->stencil_ref = *sr; - nv40->dirty |= NV40_NEW_SR; -} - -static void -nv40_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->clip = *clip; - nv40->dirty |= NV40_NEW_UCP; - nv40->draw_dirty |= NV40_NEW_UCP; -} - -static void -nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->constbuf[shader] = buf; - nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nv40->dirty |= NV40_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv40->dirty |= NV40_NEW_FRAGPROG; - } -} - -static void -nv40_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->framebuffer = *fb; - nv40->dirty |= NV40_NEW_FB; -} - -static void -nv40_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - memcpy(nv40->stipple, stipple->stipple, 4 * 32); - nv40->dirty |= NV40_NEW_STIPPLE; -} - -static void -nv40_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->scissor = *s; - nv40->dirty |= NV40_NEW_SCISSOR; -} - -static void -nv40_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->viewport = *vpt; - nv40->dirty |= NV40_NEW_VIEWPORT; - nv40->draw_dirty |= NV40_NEW_VIEWPORT; -} - -static void -nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); - nv40->vtxbuf_nr = count; - - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->draw_dirty |= NV40_NEW_ARRAYS; -} - -static void * -nv40_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nv40_vtxelt_state *cso = CALLOC_STRUCT(nv40_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nv40_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->vtxelt = hwcso; - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->draw_dirty |= NV40_NEW_ARRAYS; -} - -void -nv40_init_state_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_blend_state = nv40_blend_state_create; - nv40->pipe.bind_blend_state = nv40_blend_state_bind; - nv40->pipe.delete_blend_state = nv40_blend_state_delete; - - nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; - nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; - - nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; - nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; - nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; - - nv40->pipe.create_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_create; - nv40->pipe.bind_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_bind; - nv40->pipe.delete_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_delete; - - nv40->pipe.create_vs_state = nv40_vp_state_create; - nv40->pipe.bind_vs_state = nv40_vp_state_bind; - nv40->pipe.delete_vs_state = nv40_vp_state_delete; - - nv40->pipe.create_fs_state = nv40_fp_state_create; - nv40->pipe.bind_fs_state = nv40_fp_state_bind; - nv40->pipe.delete_fs_state = nv40_fp_state_delete; - - nv40->pipe.set_blend_color = nv40_set_blend_color; - nv40->pipe.set_stencil_ref = nv40_set_stencil_ref; - nv40->pipe.set_clip_state = nv40_set_clip_state; - nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; - nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; - nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; - nv40->pipe.set_scissor_state = nv40_set_scissor_state; - nv40->pipe.set_viewport_state = nv40_set_viewport_state; - - nv40->pipe.create_vertex_elements_state = nv40_vtxelts_state_create; - nv40->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete; - nv40->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind; - - nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; -} - diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c deleted file mode 100644 index 3ff00a37f6..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_blend_validate(struct nv40_context *nv40) -{ - so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); - return TRUE; -} - -struct nv40_state_entry nv40_state_blend = { - .validate = nv40_state_blend_validate, - .dirty = { - .pipe = NV40_NEW_BLEND, - .hw = NV40_STATE_BLEND - } -}; - -static boolean -nv40_state_blend_colour_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nv40->blend_colour; - - so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_blend_colour = { - .validate = nv40_state_blend_colour_validate, - .dirty = { - .pipe = NV40_NEW_BCOL, - .hw = NV40_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c deleted file mode 100644 index 297d71f4fa..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ /dev/null @@ -1,189 +0,0 @@ -#include "nv40_context.h" -#include "nv40_state.h" -#include "draw/draw_context.h" - -static struct nv40_state_entry *render_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vbo, - NULL -}; - -static struct nv40_state_entry *swtnl_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vtxfmt, - NULL -}; - -static void -nv40_state_do_validate(struct nv40_context *nv40, - struct nv40_state_entry **states) -{ - while (*states) { - struct nv40_state_entry *e = *states; - - if (nv40->dirty & e->dirty.pipe) { - if (e->validate(nv40)) - nv40->state.dirty |= (1ULL << e->dirty.hw); - } - - states++; - } - nv40->dirty = 0; -} - -void -nv40_state_emit(struct nv40_context *nv40) -{ - struct nv40_state *state = &nv40->state; - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - uint64_t states; - - /* XXX: race conditions - */ - if (nv40 != screen->cur_ctx) { - for (i = 0; i < NV40_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nv40; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nv40->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nv40->screen->state[i]); - states &= ~(1ULL << i); - } - - if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | - (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 1); - } - - state->dirty = 0; -} - -void -nv40_state_flush_notify(struct nouveau_channel *chan) -{ - struct nv40_context *nv40 = chan->user_private; - struct nv40_state *state = &nv40->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NV40_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NV40_STATE_FRAGPROG]); - if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) - so_emit_reloc_markers(chan, state->hw[NV40_STATE_VTXBUF]); -} - -boolean -nv40_state_validate(struct nv40_context *nv40) -{ - boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; - - if (nv40->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nv40->fallback_swtnl & nv40->dirty) - != nv40->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nv40->pipe.flush(&nv40->pipe, 0, NULL); - nv40->dirty |= (NV40_NEW_VIEWPORT | - NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS); - nv40->render_mode = HW; - } - - nv40_state_do_validate(nv40, render_states); - if (nv40->fallback_swtnl || nv40->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); - - return TRUE; -} - -boolean -nv40_state_validate_swtnl(struct nv40_context *nv40) -{ - struct draw_context *draw = nv40->draw; - - /* Setup for swtnl */ - if (nv40->render_mode == HW) { - NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); - nv40->pipe.flush(&nv40->pipe, 0, NULL); - nv40->dirty |= (NV40_NEW_VIEWPORT | - NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS); - nv40->render_mode = SWTNL; - } - - if (nv40->draw_dirty & NV40_NEW_VERTPROG) - draw_bind_vertex_shader(draw, nv40->vertprog->draw); - - if (nv40->draw_dirty & NV40_NEW_RAST) - draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); - - if (nv40->draw_dirty & NV40_NEW_UCP) - draw_set_clip_state(draw, &nv40->clip); - - if (nv40->draw_dirty & NV40_NEW_VIEWPORT) - draw_set_viewport_state(draw, &nv40->viewport); - - if (nv40->draw_dirty & NV40_NEW_ARRAYS) { - draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); - draw_set_vertex_elements(draw, nv40->vtxelt->num_elements, nv40->vtxelt->pipe); - } - - nv40_state_do_validate(nv40, swtnl_states); - if (nv40->fallback_swrast) { - NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); - return FALSE; - } - - nv40->draw_dirty = 0; - return TRUE; -} - diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c deleted file mode 100644 index fd3fdfddc0..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ /dev/null @@ -1,175 +0,0 @@ -#include "nv40_context.h" -#include "nouveau/nouveau_util.h" - -static struct pipe_buffer * -nv40_do_surface_buffer(struct pipe_surface *surface) -{ - struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; - return mt->buffer; -} - -#define nv40_surface_buffer(ps) nouveau_bo(nv40_do_surface_buffer(ps)) - -static boolean -nv40_state_framebuffer_validate(struct nv40_context *nv40) -{ - struct nouveau_channel *chan = nv40->screen->base.channel; - struct nouveau_grobj *curie = nv40->screen->curie; - struct pipe_framebuffer_state *fb = &nv40->framebuffer; - struct nv04_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(18, 24, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - - rt_enable = 0; - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | - log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | - log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; - } - else - rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), - rt[0]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), - rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_data (so, rt[1]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), - rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), - rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch); - } - - if (zeta_format) { - so_method(so, curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), - zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch); - } - - so_method(so, curie, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, curie, NV40TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, curie, NV40TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, curie, 0x1d88, 1); - so_data (so, (1 << 12) | h); - - so_ref(so, &nv40->state.hw[NV40_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_framebuffer = { - .validate = nv40_state_framebuffer_validate, - .dirty = { - .pipe = NV40_NEW_FB, - .hw = NV40_STATE_FB - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c deleted file mode 100644 index 9ecda5990f..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_rasterizer_validate(struct nv40_context *nv40) -{ - so_ref(nv40->rasterizer->so, - &nv40->state.hw[NV40_STATE_RAST]); - return TRUE; -} - -struct nv40_state_entry nv40_state_rasterizer = { - .validate = nv40_state_rasterizer_validate, - .dirty = { - .pipe = NV40_NEW_RAST, - .hw = NV40_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c deleted file mode 100644 index 753a505e93..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_scissor_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct pipe_scissor_state *s = &nv40->scissor; - struct nouveau_stateobj *so; - - if (nv40->state.hw[NV40_STATE_SCISSOR] && - (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) - return FALSE; - nv40->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); - if (nv40->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_scissor = { - .validate = nv40_state_scissor_validate, - .dirty = { - .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, - .hw = NV40_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c deleted file mode 100644 index 2b371ebfec..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_stipple_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct nouveau_grobj *curie = nv40->screen->curie; - struct nouveau_stateobj *so; - - if (nv40->state.hw[NV40_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nv40->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); - return TRUE; -} - -struct nv40_state_entry nv40_state_stipple = { - .validate = nv40_state_stipple_validate, - .dirty = { - .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, - .hw = NV40_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c deleted file mode 100644 index 3aacb00f99..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ /dev/null @@ -1,38 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_viewport_validate(struct nv40_context *nv40) -{ - struct pipe_viewport_state *vpt = &nv40->viewport; - struct nouveau_stateobj *so; - - if (nv40->state.hw[NV40_STATE_VIEWPORT] && - !(nv40->dirty & NV40_NEW_VIEWPORT)) - return FALSE; - - so = so_new(2, 9, 0); - so_method(so, nv40->screen->curie, - NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nv40->screen->curie, 0x1d78, 1); - so_data (so, 1); - - so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_viewport = { - .validate = nv40_state_viewport_validate, - .dirty = { - .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, - .hw = NV40_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c deleted file mode 100644 index 9cbe7da6db..0000000000 --- a/src/gallium/drivers/nv40/nv40_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_zsa_validate(struct nv40_context *nv40) -{ - so_ref(nv40->zsa->so, - &nv40->state.hw[NV40_STATE_ZSA]); - return TRUE; -} - -struct nv40_state_entry nv40_state_zsa = { - .validate = nv40_state_zsa_validate, - .dirty = { - .pipe = NV40_NEW_ZSA, - .hw = NV40_STATE_ZSA - } -}; - -static boolean -nv40_state_sr_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nv40->stencil_ref; - - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nv40->state.hw[NV40_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_sr = { - .validate = nv40_state_sr_validate, - .dirty = { - .pipe = NV40_NEW_SR, - .hw = NV40_STATE_SR - } -}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c deleted file mode 100644 index 02ecfd7bbb..0000000000 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ /dev/null @@ -1,64 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "util/u_tile.h" - -#include "nv40_context.h" - -static void -nv40_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv04_surface_2d *eng2d = nv40->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv04_surface_2d *eng2d = nv40->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv40_init_surface_functions(struct nv40_context *nv40) -{ - nv40->pipe.surface_copy = nv40_surface_copy; - nv40->pipe.surface_fill = nv40_surface_fill; -} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c deleted file mode 100644 index fabdf4bf23..0000000000 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ /dev/null @@ -1,565 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" -#include "util/u_format.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" - -#define FORCE_SWTNL 0 - -static INLINE int -nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) -{ - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV40TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - return 0; -} - -static boolean -nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, - unsigned ib_size) -{ - struct pipe_screen *pscreen = &nv40->screen->base.base; - unsigned type; - - if (!ib) { - nv40->idxbuf = NULL; - nv40->idxbuf_format = 0xdeadbeef; - return FALSE; - } - - if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) - return FALSE; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } - - if (ib != nv40->idxbuf || - type != nv40->idxbuf_format) { - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->idxbuf = ib; - nv40->idxbuf_format = type; - } - - return TRUE; -} - -static boolean -nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; - unsigned type, ncomp; - void *map; - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - return FALSE; - - map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV40TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - switch (ncomp) { - case 4: - so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - } - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - - pipe_buffer_unmap(pscreen, vb->buffer); - - return TRUE; -} - -void -nv40_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned restart; - - nv40_vbo_set_idxbuf(nv40, NULL, 0); - if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - while (count) { - unsigned vc, nr; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } - - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (chan, elts, push); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static void -nv40_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - void *map; - - map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } - - switch (ib_size) { - case 1: - nv40_draw_elements_u08(nv40, map, mode, start, count); - break; - case 2: - nv40_draw_elements_u16(nv40, map, mode, start, count); - break; - case 4: - nv40_draw_elements_u32(nv40, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - pipe_buffer_unmap(pscreen, ib); -} - -static void -nv40_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned restart; - - while (count) { - unsigned nr, vc; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } -} - -void -nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - boolean idxbuf; - - idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - if (idxbuf) { - nv40_draw_elements_vbo(pipe, mode, start, count); - } else { - nv40_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); -} - -static boolean -nv40_vbo_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *curie = nv40->screen->curie; - struct pipe_buffer *ib = nv40->idxbuf; - unsigned ib_format = nv40->idxbuf_format; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - int hw; - - vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt->num_elements); - vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt->num_elements); - - for (hw = 0; hw < nv40->vtxelt->num_elements; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nv40->vtxelt->pipe[hw]; - vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->stride) { - if (!sattr) - sattr = so_new(16, 16 * 4, 0); - - if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - } - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - nv40->fallback_swtnl |= NV40_NEW_ARRAYS; - so_ref(NULL, &vtxbuf); - so_ref(NULL, &vtxfmt); - return FALSE; - } - - so_reloc(vtxbuf, nouveau_bo(vb->buffer), - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); - } - - if (ib) { - struct nouveau_bo *bo = nouveau_bo(ib); - - so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - so_method(vtxbuf, curie, 0x1710, 1); - so_data (vtxbuf, 0); - - so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); - so_ref(NULL, &vtxbuf); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); - so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); - so_ref(NULL, &vtxfmt); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); - so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); - so_ref(NULL, &sattr); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); - return FALSE; -} - -struct nv40_state_entry nv40_state_vbo = { - .validate = nv40_vbo_validate, - .dirty = { - .pipe = NV40_NEW_ARRAYS, - .hw = 0, - } -}; - diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 0eb42f323f..aa14e17872 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -97,6 +97,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50_init_surface_functions(nv50); nv50_init_state_functions(nv50); nv50_init_query_functions(nv50); + nv50_init_transfer_functions(nv50); nv50->draw = draw_create(); assert(nv50->draw); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 8793c2aac5..1743f6fb39 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -175,6 +175,7 @@ nv50_context(struct pipe_context *pipe) extern void nv50_init_surface_functions(struct nv50_context *nv50); extern void nv50_init_state_functions(struct nv50_context *nv50); extern void nv50_init_query_functions(struct nv50_context *nv50); +extern void nv50_init_transfer_functions(struct nv50_context *nv50); extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 7e2e8aa336..adf0d3b374 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -234,7 +234,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->context_create = nv50_create; nv50_screen_init_miptree_functions(pscreen); - nv50_transfer_init_screen_functions(pscreen); /* DMA engine object */ ret = nouveau_grobj_alloc(chan, 0xbeef5039, diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index d1bc80cb9e..ec19ea655b 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -38,6 +38,4 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } -void nv50_transfer_init_screen_functions(struct pipe_screen *); - #endif diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 4c48b12cd8..eb64f6cee4 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -137,7 +137,8 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so, so_data (so, nv50_tex_format_list[i].hw); so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0); - so_data (so, mode); + so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RD | NOUVEAU_BO_OR, mode, mode); so_data (so, 0x00300000); so_data (so, mt->base.base.width0 | (1 << 31)); so_data (so, (mt->base.base.last_level << 28) | diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 7c360e9e73..9eb223eca6 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -121,11 +121,12 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, } static struct pipe_transfer * -nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nv50_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, unsigned w, unsigned h) { + struct pipe_screen *pscreen = pcontext->screen; struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; @@ -186,7 +187,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } static void -nv50_transfer_del(struct pipe_transfer *ptx) +nv50_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); @@ -196,7 +197,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height); if (ptx->usage & PIPE_TRANSFER_WRITE) { - struct pipe_screen *pscreen = pt->screen; + struct pipe_screen *pscreen = pcontext->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, @@ -218,7 +219,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) } static void * -nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nv50_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; unsigned flags = 0; @@ -236,7 +237,7 @@ nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } static void -nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nv50_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; @@ -244,12 +245,12 @@ nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } void -nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) +nv50_init_transfer_functions(struct nv50_context *nv50) { - pscreen->get_tex_transfer = nv50_transfer_new; - pscreen->tex_transfer_destroy = nv50_transfer_del; - pscreen->transfer_map = nv50_transfer_map; - pscreen->transfer_unmap = nv50_transfer_unmap; + nv50->pipe.get_tex_transfer = nv50_transfer_new; + nv50->pipe.tex_transfer_destroy = nv50_transfer_del; + nv50->pipe.transfer_map = nv50_transfer_map; + nv50->pipe.transfer_unmap = nv50_transfer_unmap; } void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 6b9c1ee231..5047286806 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -549,6 +549,7 @@ nv50_vbo_validate(struct nv50_context *nv50) if (nv50->vtxbuf_nr == 0) return NULL; + nv50->vbo_fifo = 0; if (nv50->screen->force_push || nv50->vertprog->cfg.edgeflag_in < 16) nv50->vbo_fifo = 0xffff; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile new file mode 100644 index 0000000000..dfe97e6ed5 --- /dev/null +++ b/src/gallium/drivers/nvfx/Makefile @@ -0,0 +1,32 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvfx + +C_SOURCES = \ + nv04_surface_2d.c \ + nvfx_context.c \ + nvfx_clear.c \ + nvfx_draw.c \ + nvfx_fragprog.c \ + nvfx_fragtex.c \ + nv30_fragtex.c \ + nv40_fragtex.c \ + nvfx_miptree.c \ + nvfx_query.c \ + nvfx_screen.c \ + nvfx_state.c \ + nvfx_state_blend.c \ + nvfx_state_emit.c \ + nvfx_state_fb.c \ + nvfx_state_rasterizer.c \ + nvfx_state_scissor.c \ + nvfx_state_stipple.c \ + nvfx_state_viewport.c \ + nvfx_state_zsa.c \ + nvfx_surface.c \ + nvfx_transfer.c \ + nvfx_vbo.c \ + nvfx_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c index b074547c4d..ed18c9f24d 100644 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.c +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.c @@ -518,7 +518,6 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; } - struct nv40_screen* screen = (struct nv40_screen*)pscreen; ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; struct pipe_texture templ; @@ -544,4 +543,3 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d return temp_ns; } - diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h index ce696a11a3..ce696a11a3 100644 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.h +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index f7d98f3f20..2b56f45492 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -1,7 +1,37 @@ #include "util/u_format.h" -#include "nv30_context.h" +#include "nvfx_context.h" #include "nouveau/nouveau_util.h" +#include "nvfx_tex.h" + +void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 8) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ { \ @@ -57,11 +87,11 @@ nv30_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * -nv30_fragtex_build(struct nv30_context *nv30, int unit) +struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit) { - struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; - struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit]; struct pipe_texture *pt = &nv30mt->base; struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; @@ -101,7 +131,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; so = so_new(1, 8, 2); - so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); @@ -115,47 +145,3 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return so; } - -static boolean -nv30_fragtex_validate(struct nv30_context *nv30) -{ - struct nv30_fragment_program *fp = nv30->fragprog; - struct nv30_state *state = &nv30->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); - } - - samplers = nv30->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv30_fragtex_build(nv30, unit); - so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); - } - - nv30->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nv30_state_entry nv30_state_fragtex = { - .validate = nv30_fragtex_validate, - .dirty = { - .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h new file mode 100644 index 0000000000..ec0444c07f --- /dev/null +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -0,0 +1,169 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index b60118922a..5889b5e40d 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -1,18 +1,63 @@ #include "util/u_format.h" +#include "nvfx_context.h" +#include "nvfx_tex.h" -#include "nv40_context.h" +void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 2) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ TRUE, \ PIPE_FORMAT_##m, \ NV40TCL_TEX_FORMAT_FORMAT_##tf, \ - (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ - NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ - NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ - NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ - ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ - (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ + ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \ + (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \ } struct nv40_texture_format { @@ -60,11 +105,11 @@ nv40_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * -nv40_fragtex_build(struct nv40_context *nv40, int unit) +struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit) { - struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; - struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit]; struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer); struct pipe_texture *pt = &nv40mt->base; struct nv40_texture_format *tf; @@ -81,20 +126,20 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); if (1) /* XXX */ - txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + txf |= NV34TCL_TX_FORMAT_NO_BORDER; switch (pt->target) { case PIPE_TEXTURE_CUBE: - txf |= NV40TCL_TEX_FORMAT_CUBIC; + txf |= NV34TCL_TX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: - txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + txf |= NV34TCL_TX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: - txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + txf |= NV34TCL_TX_FORMAT_DIMS_3D; break; case PIPE_TEXTURE_1D: - txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + txf |= NV34TCL_TX_FORMAT_DIMS_1D; break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); @@ -111,63 +156,19 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; so = so_new(2, 9, 2); - so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); so_data (so, ps->wrap); so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0); so_data (so, ps->bcol); - so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1); so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; } - -static boolean -nv40_fragtex_validate(struct nv40_context *nv40) -{ - struct nv40_fragment_program *fp = nv40->fragprog; - struct nv40_state *state = &nv40->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); - } - - samplers = nv40->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv40_fragtex_build(nv40, unit); - so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); - } - - nv40->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nv40_state_entry nv40_state_fragtex = { - .validate = nv40_fragtex_validate, - .dirty = { - .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h new file mode 100644 index 0000000000..7337293bab --- /dev/null +++ b/src/gallium/drivers/nvfx/nv40_vertprog.h @@ -0,0 +1,177 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nvfx/nvfx_clear.c index ddf13addf3..2be70fcee4 100644 --- a/src/gallium/drivers/nv40/nv40_clear.c +++ b/src/gallium/drivers/nvfx/nvfx_clear.c @@ -3,12 +3,12 @@ #include "pipe/p_state.h" #include "util/u_clear.h" -#include "nv40_context.h" +#include "nvfx_context.h" void -nv40_clear(struct pipe_context *pipe, unsigned buffers, +nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { - util_clear(pipe, &nv40_context(pipe)->framebuffer, buffers, rgba, depth, + util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth, stencil); } diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c new file mode 100644 index 0000000000..fc3cbdb558 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -0,0 +1,90 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvfx_context.h" +#include "nvfx_screen.h" + +static void +nvfx_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 1); + } + + FIRE_RING(chan); + if (fence) + *fence = NULL; +} + +static void +nvfx_destroy(struct pipe_context *pipe) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned i; + + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (nvfx->state.hw[i]) + so_ref(NULL, &nvfx->state.hw[i]); + } + + if (nvfx->draw) + draw_destroy(nvfx->draw); + FREE(nvfx); +} + +struct pipe_context * +nvfx_create(struct pipe_screen *pscreen, void *priv) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nvfx_context *nvfx; + struct nouveau_winsys *nvws = screen->nvws; + + nvfx = CALLOC(1, sizeof(struct nvfx_context)); + if (!nvfx) + return NULL; + nvfx->screen = screen; + + nvfx->nvws = nvws; + + nvfx->pipe.winsys = ws; + nvfx->pipe.screen = pscreen; + nvfx->pipe.priv = priv; + nvfx->pipe.destroy = nvfx_destroy; + nvfx->pipe.draw_arrays = nvfx_draw_arrays; + nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.clear = nvfx_clear; + nvfx->pipe.flush = nvfx_flush; + + nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; + + screen->base.channel->user_private = nvfx; + screen->base.channel->flush_notify = nvfx_state_flush_notify; + + nvfx->is_nv4x = screen->is_nv4x; + + nvfx_init_query_functions(nvfx); + nvfx_init_surface_functions(nvfx); + nvfx_init_state_functions(nvfx); + nvfx_init_transfer_functions(nvfx); + + /* Create, configure, and install fallback swtnl path */ + nvfx->draw = draw_create(); + draw_wide_point_threshold(nvfx->draw, 9999999.0); + draw_wide_line_threshold(nvfx->draw, 9999999.0); + draw_enable_line_stipple(nvfx->draw, FALSE); + draw_enable_point_sprites(nvfx->draw, FALSE); + draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); + + return &nvfx->pipe; +} diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h new file mode 100644 index 0000000000..5eed8a560e --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -0,0 +1,264 @@ +#ifndef __NVFX_CONTEXT_H__ +#define __NVFX_CONTEXT_H__ + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nvfx_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nvfx_state_index { + NVFX_STATE_FB = 0, + NVFX_STATE_VIEWPORT = 1, + NVFX_STATE_BLEND = 2, + NVFX_STATE_RAST = 3, + NVFX_STATE_ZSA = 4, + NVFX_STATE_BCOL = 5, + NVFX_STATE_CLIP = 6, + NVFX_STATE_SCISSOR = 7, + NVFX_STATE_STIPPLE = 8, + NVFX_STATE_FRAGPROG = 9, + NVFX_STATE_VERTPROG = 10, + NVFX_STATE_FRAGTEX0 = 11, + NVFX_STATE_FRAGTEX1 = 12, + NVFX_STATE_FRAGTEX2 = 13, + NVFX_STATE_FRAGTEX3 = 14, + NVFX_STATE_FRAGTEX4 = 15, + NVFX_STATE_FRAGTEX5 = 16, + NVFX_STATE_FRAGTEX6 = 17, + NVFX_STATE_FRAGTEX7 = 18, + NVFX_STATE_FRAGTEX8 = 19, + NVFX_STATE_FRAGTEX9 = 20, + NVFX_STATE_FRAGTEX10 = 21, + NVFX_STATE_FRAGTEX11 = 22, + NVFX_STATE_FRAGTEX12 = 23, + NVFX_STATE_FRAGTEX13 = 24, + NVFX_STATE_FRAGTEX14 = 25, + NVFX_STATE_FRAGTEX15 = 26, + NVFX_STATE_VERTTEX0 = 27, + NVFX_STATE_VERTTEX1 = 28, + NVFX_STATE_VERTTEX2 = 29, + NVFX_STATE_VERTTEX3 = 30, + NVFX_STATE_VTXBUF = 31, + NVFX_STATE_VTXFMT = 32, + NVFX_STATE_VTXATTR = 33, + NVFX_STATE_SR = 34, + NVFX_STATE_MAX = 35 +}; + +#include "nvfx_screen.h" + +#define NVFX_NEW_BLEND (1 << 0) +#define NVFX_NEW_RAST (1 << 1) +#define NVFX_NEW_ZSA (1 << 2) +#define NVFX_NEW_SAMPLER (1 << 3) +#define NVFX_NEW_FB (1 << 4) +#define NVFX_NEW_STIPPLE (1 << 5) +#define NVFX_NEW_SCISSOR (1 << 6) +#define NVFX_NEW_VIEWPORT (1 << 7) +#define NVFX_NEW_BCOL (1 << 8) +#define NVFX_NEW_VERTPROG (1 << 9) +#define NVFX_NEW_FRAGPROG (1 << 10) +#define NVFX_NEW_ARRAYS (1 << 11) +#define NVFX_NEW_UCP (1 << 12) +#define NVFX_NEW_SR (1 << 13) + +struct nvfx_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nvfx_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NVFX_STATE_MAX]; +}; + +struct nvfx_vtxelt_state { + struct pipe_vertex_element pipe[16]; + unsigned num_elements; +}; + +struct nvfx_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nvfx_screen *screen; + + unsigned is_nv4x; /* either 0 or ~0 */ + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nvfx_state state; + struct { + struct nvfx_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nvfx_vertex_program *vertprog; + struct nvfx_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nvfx_rasterizer_state *rasterizer; + struct nvfx_zsa_state *zsa; + struct nvfx_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nvfx_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct nvfx_vtxelt_state *vtxelt; +}; + +static INLINE struct nvfx_context * +nvfx_context(struct pipe_context *pipe) +{ + return (struct nvfx_context *)pipe; +} + +struct nvfx_state_entry { + boolean (*validate)(struct nvfx_context *nvfx); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern struct nvfx_state_entry nvfx_state_blend; +extern struct nvfx_state_entry nvfx_state_blend_colour; +extern struct nvfx_state_entry nvfx_state_fragprog; +extern struct nvfx_state_entry nvfx_state_fragtex; +extern struct nvfx_state_entry nvfx_state_framebuffer; +extern struct nvfx_state_entry nvfx_state_rasterizer; +extern struct nvfx_state_entry nvfx_state_scissor; +extern struct nvfx_state_entry nvfx_state_sr; +extern struct nvfx_state_entry nvfx_state_stipple; +extern struct nvfx_state_entry nvfx_state_vbo; +extern struct nvfx_state_entry nvfx_state_vertprog; +extern struct nvfx_state_entry nvfx_state_viewport; +extern struct nvfx_state_entry nvfx_state_vtxfmt; +extern struct nvfx_state_entry nvfx_state_zsa; + +extern void nvfx_init_query_functions(struct nvfx_context *nvfx); +extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); + +/* nvfx_context.c */ +struct pipe_context * +nvfx_create(struct pipe_screen *pscreen, void *priv); + +/* nvfx_clear.c */ +extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil); + +/* nvfx_draw.c */ +extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); +extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + +/* nvfx_fragprog.c */ +extern void nvfx_fragprog_destroy(struct nvfx_context *, + struct nvfx_fragment_program *); + +/* nv30_fragtex.c */ +extern void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); +extern void nv30_fragtex_bind(struct nvfx_context *); +extern struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit); + +/* nv40_fragtex.c */ +extern void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); +extern void nv40_fragtex_bind(struct nvfx_context *); +extern struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit); + +/* nvfx_state.c */ +extern void nvfx_init_state_functions(struct nvfx_context *nvfx); + +/* nvfx_state_emit.c */ +extern void nvfx_state_flush_notify(struct nouveau_channel *chan); +extern boolean nvfx_state_validate(struct nvfx_context *nvfx); +extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); +extern void nvfx_state_emit(struct nvfx_context *nvfx); + +/* nvfx_transfer.c */ +extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); + +/* nvfx_vbo.c */ +extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern void nvfx_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nvfx_vertprog.c */ +extern void nvfx_vertprog_destroy(struct nvfx_context *, + struct nvfx_vertex_program *); + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c new file mode 100644 index 0000000000..5379b29efd --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -0,0 +1,350 @@ +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "tgsi/tgsi_ureg.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nvfx_context.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nvfx_render_stage { + struct draw_stage stage; + struct nvfx_context *nvfx; + unsigned prim; +}; + +static INLINE struct nvfx_render_stage * +nvfx_render_stage(struct draw_stage *stage) +{ + return (struct nvfx_render_stage *)stage; +} + +static INLINE void +nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { + unsigned idx = nvfx->swtnl.draw[i]; + unsigned hw = nvfx->swtnl.hw[i]; + + switch (nvfx->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); + break; + case 0xff: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3])); + OUT_RING (chan, fui(1.0f / v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nvfx_render_vertex(nvfx, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); +} + +static void +nvfx_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); +} + +static void +nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); +} + +static void +nvfx_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(draw); + struct nvfx_context *nvfx = rs->nvfx; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nvfx_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static struct nvfx_vertex_program * +nvfx_create_drawvp(struct nvfx_context *nvfx) +{ + struct ureg_program *ureg; + uint i; + + ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + if (ureg == NULL) + return NULL; + + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, + ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), + ureg_DECL_vs_input(ureg, 5)); + for (i = 0; i < 8; ++i) + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); + + ureg_END( ureg ); + + return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); +} + +struct draw_stage * +nvfx_draw_render_stage(struct nvfx_context *nvfx) +{ + struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); + + if (!nvfx->swtnl.vertprog) + nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); + + render->nvfx = nvfx; + render->stage.draw = nvfx->draw; + render->stage.point = nvfx_render_point; + render->stage.line = nvfx_render_line; + render->stage.tri = nvfx_render_tri; + render->stage.flush = nvfx_render_flush; + render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter; + render->stage.destroy = nvfx_render_destroy; + + return &render->stage; +} + +void +nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_screen *pscreen = pipe->screen; + unsigned i; + void *map; + + if (!nvfx_state_validate_swtnl(nvfx)) + return; + nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); + nvfx_state_emit(nvfx); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) { + map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nvfx->draw, i, map); + } + + if (idxbuf) { + map = pipe_buffer_map(pscreen, idxbuf, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); + } + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = pipe_buffer_map(pscreen, + nvfx->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, + map, nr); + } + + draw_arrays(nvfx->draw, mode, start, count); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) + pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); + + if (idxbuf) + pipe_buffer_unmap(pscreen, idxbuf); + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) + pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nvfx->draw); + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); + unsigned a = nvfx->swtnl.nr_attribs++; + + nvfx->swtnl.hw[a] = hw; + nvfx->swtnl.emit[a] = emit; + nvfx->swtnl.draw[a] = draw_out; +} + +static boolean +nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nvfx->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nvfx_state_entry nvfx_state_vtxfmt = { + .validate = nvfx_state_vtxfmt_validate, + .dirty = { + .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index dc24f9b08a..76351430f4 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -7,37 +7,20 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "nv40_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV40_FP_OP_COND_TR -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) -#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) +#include "nvfx_context.h" +#include "nvfx_shader.h" #define MAX_CONSTS 128 #define MAX_IMM 32 -struct nv40_fpc { - struct nv40_fragment_program *fp; +struct nvfx_fpc { + struct nvfx_fragment_program *fp; uint attrib_map[PIPE_MAX_SHADER_INPUTS]; unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_temp; int num_regs; @@ -50,35 +33,35 @@ struct nv40_fpc { } consts[MAX_CONSTS]; int nr_consts; - struct nv40_sreg imm[MAX_IMM]; + struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; }; -static INLINE struct nv40_sreg -temp(struct nv40_fpc *fpc) +static INLINE struct nvfx_sreg +temp(struct nvfx_fpc *fpc) { int idx = ffs(~fpc->r_temps) - 1; if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } fpc->r_temps |= (1 << idx); fpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void -release_temps(struct nv40_fpc *fpc) +release_temps(struct nvfx_fpc *fpc) { fpc->r_temps &= ~fpc->r_temps_discard; fpc->r_temps_discard = 0; } -static INLINE struct nv40_sreg -constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +static INLINE struct nvfx_sreg +constant(struct nvfx_fpc *fpc, int pipe, float vals[4]) { int idx; @@ -89,45 +72,45 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4]) fpc->consts[idx].pipe = pipe; if (pipe == -1) memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ (d), (m), (s0), (s1), (s2)) #define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ (d), (m), (s0), none, none) static void -grow_insns(struct nv40_fpc *fpc, int size) +grow_insns(struct nvfx_fpc *fpc, int size) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; fp->insn_len += size; fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); } static void -emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; switch (src.type) { - case NV40SR_INPUT: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; - case NV40SR_OUTPUT: - sr |= NV40_FP_REG_SRC_HALF; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ - case NV40SR_TEMP: - sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; - case NV40SR_CONST: + case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); fpc->have_const = 1; @@ -135,7 +118,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) hw = &fp->insn[fpc->inst_offset]; if (fpc->consts[src.index].pipe >= 0) { - struct nv40_fragment_program_data *fpd; + struct nvfx_fragment_program_data *fpd; fp->consts = realloc(fp->consts, ++fp->nr_consts * sizeof(*fpd)); @@ -149,63 +132,63 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; - case NV40SR_NONE: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); break; default: assert(0); } if (src.negate) - sr |= NV40_FP_REG_NEGATE; + sr |= NVFX_FP_REG_NEGATE; if (src.abs) hw[1] |= (1 << (29 + pos)); - sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); hw[pos + 1] |= sr; } static void -emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; switch (dst.type) { - case NV40SR_TEMP: + case NVFXSR_TEMP: if (fpc->num_regs < (dst.index + 1)) fpc->num_regs = dst.index + 1; break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: if (dst.index == 1) { fp->fp_control |= 0xe; } else { - hw[0] |= NV40_FP_OP_OUT_REG_HALF; + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; } break; - case NV40SR_NONE: + case NVFXSR_NONE: hw[0] |= (1 << 30); break; default: assert(0); } - hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); } static void -nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; fpc->inst_offset = fp->insn_len; @@ -214,22 +197,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NV40_FP_OP_OPCODE_KIL) - fp->fp_control |= NV40TCL_FP_CONTROL_KIL; - hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + if (op == NVFX_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); if (sat) - hw[0] |= NV40_FP_OP_OUT_SAT; + hw[0] |= NVFX_FP_OP_OUT_SAT; if (dst.cc_update) - hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); emit_dst(fpc, dst); emit_src(fpc, 0, s0); @@ -238,26 +221,26 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, } static void -nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; - nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); fp->samplers |= (1 << unit); } -static INLINE struct nv40_sreg -tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, + src = nvfx_sr(NVFXSR_INPUT, fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: @@ -288,18 +271,18 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) return src; } -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: return fpc->r_result[fdst->Register.Index]; case TGSI_FILE_TEMPORARY: return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); default: NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); } } @@ -308,52 +291,19 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; return mask; } static boolean -src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, +nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg src[3], dst, tmp; + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; int mask, sat, unit; int ai = -1, ci = -1, ii = -1; int i; @@ -377,23 +327,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -404,7 +343,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -415,7 +354,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -445,25 +384,25 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = nv40_sr(NV40SR_NONE, 0); + tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV40_VP_INST_COND_GE; + dst.cc_test = NVFX_COND_GE; arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV40_VP_INST_COND_LT; + dst.cc_test = NVFX_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: arith(fpc, sat, COS, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DDX: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -471,13 +410,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } break; case TGSI_OPCODE_DDY: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -492,7 +431,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_OPCODE_DPH: tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none); arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none); break; @@ -512,10 +451,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, 0, KIL, none, 0, none, none, none); break; case TGSI_OPCODE_KIL: - dst = nv40_sr(NV40SR_NONE, 0); + dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NVFX_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -523,9 +462,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; // case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + if(!nvfx->is_nv4x) + arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); + else { + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + } break; case TGSI_OPCODE_MAD: arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -543,13 +486,17 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); + if(!nvfx->is_nv4x) + arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + } break; case TGSI_OPCODE_RCP: arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -558,42 +505,50 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, assert(0); break; case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + if(!nvfx->is_nv4x) + arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + } break; case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); + if(!nvfx->is_nv4x) + arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + } break; case TGSI_OPCODE_SCS: /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) + if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } } else { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } } @@ -641,7 +596,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, tmp = temp(fpc); arith(fpc, 0, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -655,32 +610,32 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } static boolean -nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, +nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { int hw; switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV40_FP_OP_INPUT_SRC_POSITION; + hw = NVFX_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_FP_OP_INPUT_SRC_COL0; + hw = NVFX_FP_OP_INPUT_SRC_COL0; } else if (fdec->Semantic.Index == 1) { - hw = NV40_FP_OP_INPUT_SRC_COL1; + hw = NVFX_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV40_FP_OP_INPUT_SRC_FOGC; + hw = NVFX_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); @@ -697,7 +652,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, } static boolean -nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, +nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { unsigned idx = fdec->Range.First; @@ -708,12 +663,14 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, hw = 1; break; case TGSI_SEMANTIC_COLOR: + hw = ~0; switch (fdec->Semantic.Index) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; case 3: hw = 4; break; - default: + } + if(hw > ((nvfx->is_nv4x) ? 4 : 2)) { NOUVEAU_ERR("bad rcol index\n"); return FALSE; } @@ -723,13 +680,13 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, return FALSE; } - fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); fpc->r_temps |= (1 << hw); return TRUE; } static boolean -nv40_fragprog_prepare(struct nv40_fpc *fpc) +nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) { struct tgsi_parse_context p; int high_temp = -1, i; @@ -746,11 +703,11 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_INPUT: - if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) goto out_err; break; case TGSI_FILE_OUTPUT: - if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) goto out_err; break; case TGSI_FILE_TEMPORARY: @@ -787,7 +744,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) tgsi_parse_free(&p); if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); fpc->r_temps_discard = 0; @@ -803,19 +760,19 @@ out_err: } static void -nv40_fragprog_translate(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_translate(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { struct tgsi_parse_context parse; - struct nv40_fpc *fpc = NULL; + struct nvfx_fpc *fpc = NULL; - fpc = CALLOC(1, sizeof(struct nv40_fpc)); + fpc = CALLOC(1, sizeof(struct nvfx_fpc)); if (!fpc) return; fpc->fp = fp; fpc->num_regs = 2; - if (!nv40_fragprog_prepare(fpc)) { + if (!nvfx_fragprog_prepare(nvfx, fpc)) { FREE(fpc); return; } @@ -831,7 +788,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, const struct tgsi_full_instruction *finst; finst = &parse.FullToken.FullInstruction; - if (!nv40_fragprog_parse_instruction(fpc, finst)) + if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) goto out_err; } break; @@ -840,7 +797,10 @@ nv40_fragprog_translate(struct nv40_context *nv40, } } - fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + if(!nvfx->is_nv4x) + fp->fp_control |= (fpc->num_regs-1)/2; + else + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; /* Terminate final instruction */ fp->insn[fpc->inst_offset] |= 0x00000001; @@ -862,10 +822,10 @@ out_err: } static void -nv40_fragprog_upload(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_upload(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { - struct pipe_screen *pscreen = nv40->pipe.screen; + struct pipe_screen *pscreen = nvfx->pipe.screen; const uint32_t le = 1; uint32_t *map; int i; @@ -896,12 +856,12 @@ nv40_fragprog_upload(struct nv40_context *nv40, } static boolean -nv40_fragprog_validate(struct nv40_context *nv40) +nvfx_fragprog_validate(struct nvfx_context *nvfx) { - struct nv40_fragment_program *fp = nv40->fragprog; + struct nvfx_fragment_program *fp = nvfx->fragprog; struct pipe_buffer *constbuf = - nv40->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nv40->pipe.screen; + nvfx->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_screen *pscreen = nvfx->pipe.screen; struct nouveau_stateobj *so; boolean new_consts = FALSE; int i; @@ -909,24 +869,31 @@ nv40_fragprog_validate(struct nv40_context *nv40) if (fp->translated) goto update_constants; - nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; - nv40_fragprog_translate(nv40, fp); + nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG; + nvfx_fragprog_translate(nvfx, fp); if (!fp->translated) { - nv40->fallback_swrast |= NV40_NEW_FRAGPROG; + nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG; return FALSE; } fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv40_fragprog_upload(nv40, fp); + nvfx_fragprog_upload(nvfx, fp); - so = so_new(2, 2, 1); - so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so = so_new(4, 4, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0, - NV40TCL_FP_ADDRESS_DMA1); - so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); + if(!nvfx->is_nv4x) { + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, (1<<16)|0x4); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); + } + so_ref(so, &fp->so); so_ref(NULL, &so); @@ -937,7 +904,7 @@ update_constants: map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { - struct nv40_fragment_program_data *fpd = &fp->consts[i]; + struct nvfx_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; @@ -949,11 +916,11 @@ update_constants: pipe_buffer_unmap(pscreen, constbuf); if (new_consts) - nv40_fragprog_upload(nv40, fp); + nvfx_fragprog_upload(nvfx, fp); } - if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { - so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { + so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); return TRUE; } @@ -961,8 +928,8 @@ update_constants: } void -nv40_fragprog_destroy(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_destroy(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { if (fp->buffer) pipe_buffer_reference(&fp->buffer, NULL); @@ -974,11 +941,10 @@ nv40_fragprog_destroy(struct nv40_context *nv40, FREE(fp->insn); } -struct nv40_state_entry nv40_state_fragprog = { - .validate = nv40_fragprog_validate, +struct nvfx_state_entry nvfx_state_fragprog = { + .validate = nvfx_fragprog_validate, .dirty = { - .pipe = NV40_NEW_FRAGPROG, - .hw = NV40_STATE_FRAGPROG + .pipe = NVFX_NEW_FRAGPROG, + .hw = NVFX_STATE_FRAGPROG } }; - diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c new file mode 100644 index 0000000000..84e4eb1004 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -0,0 +1,49 @@ +#include "nvfx_context.h" + +static boolean +nvfx_fragtex_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + struct nvfx_state *state = &nvfx->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(1, 1, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + samplers = nvfx->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + if(!nvfx->is_nv4x) + so = nv30_fragtex_build(nvfx, unit); + else + so = nv40_fragtex_build(nvfx, unit); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + nvfx->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nvfx_state_entry nvfx_state_fragtex = { + .validate = nvfx_fragtex_validate, + .dirty = { + .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 62e97bcea4..0f5ed61aab 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -4,13 +4,13 @@ #include "util/u_format.h" #include "util/u_math.h" -#include "nv40_context.h" -#include "../nouveau/nv04_surface_2d.h" +#include "nvfx_context.h" +#include "nv04_surface_2d.h" static void -nv40_miptree_layout(struct nv40_miptree *mt) +nvfx_miptree_layout(struct nvfx_miptree *mt) { struct pipe_texture *pt = &mt->base; uint width = pt->width0; @@ -62,13 +62,13 @@ nv40_miptree_layout(struct nv40_miptree *mt) } static struct pipe_texture * -nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { - struct nv40_miptree *mt; + struct nvfx_miptree *mt; unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | NOUVEAU_BUFFER_USAGE_TEXTURE; - mt = MALLOC(sizeof(struct nv40_miptree)); + mt = MALLOC(sizeof(struct nvfx_miptree)); if (!mt) return NULL; mt->base = *pt; @@ -89,6 +89,18 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else { switch (pt->format) { + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + /* TODO: we can actually swizzle these formats on nv40, we + are just preserving the pre-unification behavior. + The whole 2D code is going to be rewritten anyway. */ + if(nvfx_screen(pscreen)->is_nv4x) { + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } /* TODO: Figure out which formats can be swizzled */ case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8X8_UNORM: @@ -112,7 +124,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - nv40_miptree_layout(mt); + nvfx_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); if (!mt->buffer) { @@ -124,17 +136,17 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) } static struct pipe_texture * -nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +nvfx_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, const unsigned *stride, struct pipe_buffer *pb) { - struct nv40_miptree *mt; + struct nvfx_miptree *mt; /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || pt->depth0 != 1) return NULL; - mt = CALLOC_STRUCT(nv40_miptree); + mt = CALLOC_STRUCT(nvfx_miptree); if (!mt) return NULL; @@ -153,9 +165,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, } static void -nv40_miptree_destroy(struct pipe_texture *pt) +nvfx_miptree_destroy(struct pipe_texture *pt) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; int l; pipe_buffer_reference(&mt->buffer, NULL); @@ -168,11 +180,11 @@ nv40_miptree_destroy(struct pipe_texture *pt) } static struct pipe_surface * -nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; struct nv04_surface *ns; ns = CALLOC_STRUCT(nv04_surface); @@ -202,21 +214,21 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; return &ns->base; } static void -nv40_miptree_surface_del(struct pipe_surface *ps) +nvfx_miptree_surface_del(struct pipe_surface *ps) { struct nv04_surface* ns = (struct nv04_surface*)ps; if(ns->backing) { - struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv40_miptree_surface_del(&ns->backing->base); + nvfx_miptree_surface_del(&ns->backing->base); } pipe_texture_reference(&ps->texture, NULL); @@ -224,13 +236,12 @@ nv40_miptree_surface_del(struct pipe_surface *ps) } void -nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +nvfx_screen_init_miptree_functions(struct pipe_screen *pscreen) { - pscreen->texture_create = nv40_miptree_create; - pscreen->texture_destroy = nv40_miptree_destroy; - pscreen->get_tex_surface = nv40_miptree_surface_new; - pscreen->tex_surface_destroy = nv40_miptree_surface_del; + pscreen->texture_create = nvfx_miptree_create; + pscreen->texture_destroy = nvfx_miptree_destroy; + pscreen->get_tex_surface = nvfx_miptree_surface_new; + pscreen->tex_surface_destroy = nvfx_miptree_surface_del; - nouveau_screen(pscreen)->texture_blanket = nv40_miptree_blanket; + nouveau_screen(pscreen)->texture_blanket = nvfx_miptree_blanket; } - diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c new file mode 100644 index 0000000000..acbaf75a23 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -0,0 +1,127 @@ +#include "pipe/p_context.h" + +#include "nvfx_context.h" + +struct nvfx_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nvfx_query * +nvfx_query(struct pipe_query *pipe) +{ + return (struct nvfx_query *)pipe; +} + +static struct pipe_query * +nvfx_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nvfx_query *q; + + q = CALLOC(1, sizeof(struct nvfx_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_query *q = nvfx_query(pq); + + if (q->object) + nouveau_resource_free(&q->object); + FREE(q); +} + +static void +nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nouveau_notifier_reset(nvfx->screen->query, q->object->start); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); + + q->ready = FALSE; +} + +static void +nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_query *q = nvfx_query(pq); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(chan); +} + +static boolean +nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nouveau_notifier_status(nvfx->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + + nouveau_notifier_wait_status(nvfx->screen->query, + q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, 0); + } + + q->result = nouveau_notifier_return_val(nvfx->screen->query, + q->object->start); + q->ready = TRUE; + nouveau_resource_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nvfx_init_query_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_query = nvfx_query_create; + nvfx->pipe.destroy_query = nvfx_query_destroy; + nvfx->pipe.begin_query = nvfx_query_begin; + nvfx->pipe.end_query = nvfx_query_end; + nvfx->pipe.get_query_result = nvfx_query_result; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 85433d2095..8138715cc7 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -3,18 +3,18 @@ #include "nouveau/nouveau_screen.h" -#include "nv30_context.h" -#include "nv30_screen.h" +#include "nvfx_context.h" +#include "nvfx_screen.h" #define NV30TCL_CHIPSET_3X_MASK 0x00000003 #define NV34TCL_CHIPSET_3X_MASK 0x00000010 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0 /* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h - * to get the pointer to the context front buffer, so I copied nouveau_winsys here. - * nv30_screen_surface_format_supported() can then use it to enforce creating fbo - * with same number of bits everywhere. - */ +* to get the pointer to the context front buffer, so I copied nouveau_winsys here. +* nv30_screen_surface_format_supported() can then use it to enforce creating fbo +* with same number of bits everywhere. +*/ struct nouveau_winsys { struct pipe_winsys base; @@ -22,15 +22,21 @@ struct nouveau_winsys { struct pipe_surface *front; }; +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 static int -nv30_screen_get_param(struct pipe_screen *pscreen, int param) +nvfx_screen_get_param(struct pipe_screen *pscreen, int param) { + struct nvfx_screen *screen = nvfx_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; + /* TODO: check this */ + return screen->is_nv4x ? 16 : 8; case PIPE_CAP_NPOT_TEXTURES: - return 0; + return !!screen->is_nv4x; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: @@ -40,7 +46,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; + return screen->is_nv4x ? 4 : 2; case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: @@ -52,21 +58,26 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; + return !!screen->is_nv4x; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; + return 0; /* We have 4 on nv40 - but unsupported currently */ case PIPE_CAP_TGSI_CONT_SUPPORTED: return 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; + return !!screen->is_nv4x; case NOUVEAU_CAP_HW_VTXBUF: + /* TODO: this is almost surely wrong */ + return !!screen->is_nv4x; case NOUVEAU_CAP_HW_IDXBUF: - return 1; + /* TODO: this is also almost surely wrong */ + return screen->is_nv4x && screen->eng3d->grclass == NV40TCL; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; case PIPE_CAP_INDEP_BLEND_ENABLE: + /* TODO: on nv40 we have separate color masks */ + /* TODO: nv40 mrt blending is probably broken */ return 0; case PIPE_CAP_INDEP_BLEND_FUNC: return 0; @@ -83,8 +94,10 @@ nv30_screen_get_param(struct pipe_screen *pscreen, int param) } static float -nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) +nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) { + struct nvfx_screen *screen = nvfx_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_LINE_WIDTH: case PIPE_CAP_MAX_LINE_WIDTH_AA: @@ -93,9 +106,9 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) case PIPE_CAP_MAX_POINT_WIDTH_AA: return 64.0; case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 8.0; + return screen->is_nv4x ? 16.0 : 8.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; + return screen->is_nv4x ? 16.0 : 4.0; default: NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); return 0.0; @@ -103,11 +116,12 @@ nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) } static boolean -nv30_screen_surface_format_supported(struct pipe_screen *pscreen, +nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, enum pipe_format format, enum pipe_texture_target target, unsigned tex_usage, unsigned geom_flags) { + struct nvfx_screen *screen = nvfx_screen(pscreen); struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { @@ -125,9 +139,9 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_X8Z24_UNORM: return TRUE; case PIPE_FORMAT_Z16_UNORM: - if (front) { + /* TODO: this nv30 limitation probably does not exist */ + if (!screen->is_nv4x && front) return (front->format == PIPE_FORMAT_B5G6R5_UNORM); - } return TRUE; default: break; @@ -144,7 +158,14 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: return TRUE; + /* TODO: does nv30 support this? */ + case PIPE_FORMAT_R16_SNORM: + return !!screen->is_nv4x; default: break; } @@ -154,20 +175,20 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen, } static struct pipe_buffer * -nv30_surface_buffer(struct pipe_surface *surf) +nvfx_surface_buffer(struct pipe_surface *surf) { - struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture; + struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; return mt->buffer; } static void -nv30_screen_destroy(struct pipe_screen *pscreen) +nvfx_screen_destroy(struct pipe_screen *pscreen) { - struct nv30_screen *screen = nv30_screen(pscreen); + struct nvfx_screen *screen = nvfx_screen(pscreen); unsigned i; - for (i = 0; i < NV30_STATE_MAX; i++) { + for (i = 0; i < NVFX_STATE_MAX; i++) { if (screen->state[i]) so_ref(NULL, &screen->state[i]); } @@ -177,7 +198,7 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_resource_destroy(&screen->query_heap); nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->rankine); + nouveau_grobj_free(&screen->eng3d); nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); @@ -185,60 +206,155 @@ nv30_screen_destroy(struct pipe_screen *pscreen) FREE(pscreen); } +static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + int i; + + /* TODO: perhaps we should do some of this on nv40 too? */ + for (i=1; i<8; i++) { + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->eng3d, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->eng3d, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->eng3d, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->eng3d, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->eng3d, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->eng3d, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->eng3d, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->eng3d, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x13); +} + +static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); + so_data (so, screen->base.channel->vram->handle); + so_data (so, screen->base.channel->vram->handle); + + so_method(so, screen->eng3d, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->eng3d, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->eng3d, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->eng3d, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->eng3d, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->eng3d, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->eng3d, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x00000001); +} + struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { - struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; struct nouveau_stateobj *so; - unsigned rankine_class = 0; - int ret, i; + unsigned eng3d_class = 0; + int ret; if (!screen) return NULL; + pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { - nv30_screen_destroy(pscreen); + nvfx_screen_destroy(pscreen); return NULL; } chan = screen->base.channel; pscreen->winsys = ws; - pscreen->destroy = nv30_screen_destroy; - pscreen->get_param = nv30_screen_get_param; - pscreen->get_paramf = nv30_screen_get_paramf; - pscreen->is_format_supported = nv30_screen_surface_format_supported; - pscreen->context_create = nv30_create; - - nv30_screen_init_miptree_functions(pscreen); - nv30_screen_init_transfer_functions(pscreen); + pscreen->destroy = nvfx_screen_destroy; + pscreen->get_param = nvfx_screen_get_param; + pscreen->get_paramf = nvfx_screen_get_paramf; + pscreen->is_format_supported = nvfx_screen_surface_format_supported; + pscreen->context_create = nvfx_create; - /* 3D object */ switch (dev->chipset & 0xf0) { case 0x30: if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0397; - else - if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0697; - else - if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0497; + eng3d_class = 0x0397; + else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0697; + else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0497; break; - default: + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV40TCL; + else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; break; } - if (!rankine_class) { - NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", dev->chipset); + if (!eng3d_class) { + NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset); return NULL; } - ret = nouveau_grobj_alloc(chan, 0xbeef3097, rankine_class, - &screen->rankine); + nvfx_screen_init_miptree_functions(pscreen); + + ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; @@ -246,13 +362,13 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv30_surface_buffer; + screen->eng2d->buf = nvfx_surface_buffer; /* Notifier for sync purposes */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); if (ret) { NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv30_screen_destroy(pscreen); + nvfx_screen_destroy(pscreen); return NULL; } @@ -260,99 +376,54 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); if (ret) { NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv30_screen_destroy(pscreen); + nvfx_screen_destroy(pscreen); return NULL; } ret = nouveau_resource_init(&screen->query_heap, 0, 32); if (ret) { NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv30_screen_destroy(pscreen); + nvfx_screen_destroy(pscreen); return NULL; } /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 256) || + if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv30_screen_destroy(pscreen); + nvfx_screen_destroy(pscreen); return NULL; } - /* Static rankine initialisation */ - so = so_new(36, 60, 0); - so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); + /* Static eng3d initialisation */ + /* make the so big and don't worry about exact values + since we it will be thrown away immediately after use */ + so = so_new(256, 256, 0); + so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); + so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); so_data (so, chan->vram->handle); so_data (so, chan->gart->handle); - so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); + so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); so_data (so, chan->vram->handle); so_data (so, chan->gart->handle); -/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle);*/ - so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); - so_data (so, chan->vram->handle); - - for (i=1; i<8; i++) { - so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - so_data (so, 0); - so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - so_data (so, 0); - } - - so_method(so, screen->rankine, 0x220, 1); - so_data (so, 1); - - so_method(so, screen->rankine, 0x03b0, 1); - so_data (so, 0x00100000); - so_method(so, screen->rankine, 0x1454, 1); - so_data (so, 0); - so_method(so, screen->rankine, 0x1d80, 1); - so_data (so, 3); - so_method(so, screen->rankine, 0x1450, 1); - so_data (so, 0x00030004); - /* NEW */ - so_method(so, screen->rankine, 0x1e98, 1); - so_data (so, 0); - so_method(so, screen->rankine, 0x17e0, 3); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_method(so, screen->rankine, 0x1f80, 16); - for (i=0; i<16; i++) { - so_data (so, (i==8) ? 0x0000ffff : 0); - } - - so_method(so, screen->rankine, 0x120, 3); + so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); so_data (so, 0); - so_data (so, 1); - so_data (so, 2); + so_data (so, screen->query->handle); - so_method(so, screen->rankine, 0x1d88, 1); - so_data (so, 0x00001200); - - so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); - so_data (so, 0); - - so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - - so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); - so_data (so, 0xffff0000); + so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->vram->handle); - /* enables use of vp rather than fixed-function somehow */ - so_method(so, screen->rankine, 0x1e94, 1); - so_data (so, 0x13); + if(!screen->is_nv4x) + nv30_screen_init(screen, so); + else + nv40_screen_init(screen, so); so_emit(chan, so); so_ref(NULL, &so); diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index 9437aa050d..c0b4b9899d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -1,19 +1,21 @@ -#ifndef __NV40_SCREEN_H__ -#define __NV40_SCREEN_H__ +#ifndef __NVFX_SCREEN_H__ +#define __NVFX_SCREEN_H__ #include "nouveau/nouveau_screen.h" -#include "nouveau/nv04_surface_2d.h" +#include "nv04_surface_2d.h" -struct nv40_screen { +struct nvfx_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; - struct nv40_context *cur_ctx; + struct nvfx_context *cur_ctx; + + unsigned is_nv4x; /* either 0 or ~0 */ /* HW graphics objects */ struct nv04_surface_2d *eng2d; - struct nouveau_grobj *curie; + struct nouveau_grobj *eng3d; struct nouveau_notifier *sync; /* Query object resources */ @@ -25,16 +27,13 @@ struct nv40_screen { struct nouveau_resource *vp_data_heap; /* Current 3D state of channel */ - struct nouveau_stateobj *state[NV40_STATE_MAX]; + struct nouveau_stateobj *state[NVFX_STATE_MAX]; }; -static INLINE struct nv40_screen * -nv40_screen(struct pipe_screen *screen) +static INLINE struct nvfx_screen * +nvfx_screen(struct pipe_screen *screen) { - return (struct nv40_screen *)screen; + return (struct nvfx_screen *)screen; } -void -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen); - #endif diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h new file mode 100644 index 0000000000..0b2f044f7f --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -0,0 +1,429 @@ +#ifndef __NVFX_SHADER_H__ +#define __NVFX_SHADER_H__ + +/* this will resolve to either the NV30 or the NV40 version + * depending on the current hardware */ +/* unusual, but very fast and compact method */ +#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c)))) + +#define NVFX_VP_INST_SLOT_VEC 0 +#define NVFX_VP_INST_SLOT_SCA 1 + +#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +#define NVFX_VP_INST_IN_NORMAL 2 +#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +#define NVFX_VP_INST_IN_COL1 4 +#define NVFX_VP_INST_IN_FOGC 5 +#define NVFX_VP_INST_IN_TC0 8 +#define NVFX_VP_INST_IN_TC(n) (8+n) + +#define NVFX_VP_INST_SCA_OP_NOP 0x00 +#define NVFX_VP_INST_SCA_OP_MOV 0x01 +#define NVFX_VP_INST_SCA_OP_RCP 0x02 +#define NVFX_VP_INST_SCA_OP_RCC 0x03 +#define NVFX_VP_INST_SCA_OP_RSQ 0x04 +#define NVFX_VP_INST_SCA_OP_EXP 0x05 +#define NVFX_VP_INST_SCA_OP_LOG 0x06 +#define NVFX_VP_INST_SCA_OP_LIT 0x07 +#define NVFX_VP_INST_SCA_OP_BRA 0x09 +#define NVFX_VP_INST_SCA_OP_CAL 0x0B +#define NVFX_VP_INST_SCA_OP_RET 0x0C +#define NVFX_VP_INST_SCA_OP_LG2 0x0D +#define NVFX_VP_INST_SCA_OP_EX2 0x0E +#define NVFX_VP_INST_SCA_OP_SIN 0x0F +#define NVFX_VP_INST_SCA_OP_COS 0x10 + +#define NV40_VP_INST_SCA_OP_PUSHA 0x13 +#define NV40_VP_INST_SCA_OP_POPA 0x14 + +#define NVFX_VP_INST_VEC_OP_NOP 0x00 +#define NVFX_VP_INST_VEC_OP_MOV 0x01 +#define NVFX_VP_INST_VEC_OP_MUL 0x02 +#define NVFX_VP_INST_VEC_OP_ADD 0x03 +#define NVFX_VP_INST_VEC_OP_MAD 0x04 +#define NVFX_VP_INST_VEC_OP_DP3 0x05 +#define NVFX_VP_INST_VEC_OP_DPH 0x06 +#define NVFX_VP_INST_VEC_OP_DP4 0x07 +#define NVFX_VP_INST_VEC_OP_DST 0x08 +#define NVFX_VP_INST_VEC_OP_MIN 0x09 +#define NVFX_VP_INST_VEC_OP_MAX 0x0A +#define NVFX_VP_INST_VEC_OP_SLT 0x0B +#define NVFX_VP_INST_VEC_OP_SGE 0x0C +#define NVFX_VP_INST_VEC_OP_ARL 0x0D +#define NVFX_VP_INST_VEC_OP_FRC 0x0E +#define NVFX_VP_INST_VEC_OP_FLR 0x0F +#define NVFX_VP_INST_VEC_OP_SEQ 0x10 +#define NVFX_VP_INST_VEC_OP_SFL 0x11 +#define NVFX_VP_INST_VEC_OP_SGT 0x12 +#define NVFX_VP_INST_VEC_OP_SLE 0x13 +#define NVFX_VP_INST_VEC_OP_SNE 0x14 +#define NVFX_VP_INST_VEC_OP_STR 0x15 +#define NVFX_VP_INST_VEC_OP_SSG 0x16 +#define NVFX_VP_INST_VEC_OP_ARR 0x17 +#define NVFX_VP_INST_VEC_OP_ARA 0x18 + +#define NV40_VP_INST_VEC_OP_TXL 0x19 + +/* DWORD 3 */ +#define NVFX_VP_INST_LAST (1 << 0) + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * + * NV40 Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + */ + +//== Opcode / Destination selection == +#define NVFX_FP_OP_PROGRAM_END (1 << 0) +#define NVFX_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NVFX_FP_OP_OUT_REG_HALF (1 << 7) +#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NVFX_FP_OP_OUTMASK_SHIFT 9 +#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9) +# define NVFX_FP_OP_OUT_X (1<<9) +# define NVFX_FP_OP_OUT_Y (1<<10) +# define NVFX_FP_OP_OUT_Z (1<<11) +# define NVFX_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NVFX_FP_OP_INPUT_SRC_SHIFT 13 +#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0 +# define NVFX_FP_OP_INPUT_SRC_COL0 0x1 +# define NVFX_FP_OP_INPUT_SRC_COL1 0x2 +# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3 +# define NVFX_FP_OP_INPUT_SRC_TC0 0x4 +# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NVFX_FP_OP_TEX_UNIT_SHIFT 17 +#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NVFX_FP_OP_PRECISION_SHIFT 22 +#define NVFX_FP_OP_PRECISION_MASK (3 << 22) +# define NVFX_FP_PRECISION_FP32 0 +# define NVFX_FP_PRECISION_FP16 1 +# define NVFX_FP_PRECISION_FX12 2 +#define NVFX_FP_OP_OPCODE_SHIFT 24 +#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24) +/* NV30/NV40 fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_NOP 0x00 +#define NVFX_FP_OP_OPCODE_MOV 0x01 +#define NVFX_FP_OP_OPCODE_MUL 0x02 +#define NVFX_FP_OP_OPCODE_ADD 0x03 +#define NVFX_FP_OP_OPCODE_MAD 0x04 +#define NVFX_FP_OP_OPCODE_DP3 0x05 +#define NVFX_FP_OP_OPCODE_DP4 0x06 +#define NVFX_FP_OP_OPCODE_DST 0x07 +#define NVFX_FP_OP_OPCODE_MIN 0x08 +#define NVFX_FP_OP_OPCODE_MAX 0x09 +#define NVFX_FP_OP_OPCODE_SLT 0x0A +#define NVFX_FP_OP_OPCODE_SGE 0x0B +#define NVFX_FP_OP_OPCODE_SLE 0x0C +#define NVFX_FP_OP_OPCODE_SGT 0x0D +#define NVFX_FP_OP_OPCODE_SNE 0x0E +#define NVFX_FP_OP_OPCODE_SEQ 0x0F +#define NVFX_FP_OP_OPCODE_FRC 0x10 +#define NVFX_FP_OP_OPCODE_FLR 0x11 +#define NVFX_FP_OP_OPCODE_KIL 0x12 +#define NVFX_FP_OP_OPCODE_PK4B 0x13 +#define NVFX_FP_OP_OPCODE_UP4B 0x14 +#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_TEX 0x17 +#define NVFX_FP_OP_OPCODE_TXP 0x18 +#define NVFX_FP_OP_OPCODE_TXD 0x19 +#define NVFX_FP_OP_OPCODE_RCP 0x1A +#define NVFX_FP_OP_OPCODE_EX2 0x1C +#define NVFX_FP_OP_OPCODE_LG2 0x1D +#define NVFX_FP_OP_OPCODE_STR 0x20 +#define NVFX_FP_OP_OPCODE_SFL 0x21 +#define NVFX_FP_OP_OPCODE_COS 0x22 +#define NVFX_FP_OP_OPCODE_SIN 0x23 +#define NVFX_FP_OP_OPCODE_PK2H 0x24 +#define NVFX_FP_OP_OPCODE_UP2H 0x25 +#define NVFX_FP_OP_OPCODE_PK4UB 0x27 +#define NVFX_FP_OP_OPCODE_UP4UB 0x28 +#define NVFX_FP_OP_OPCODE_PK2US 0x29 +#define NVFX_FP_OP_OPCODE_UP2US 0x2A +#define NVFX_FP_OP_OPCODE_DP2A 0x2E +#define NVFX_FP_OP_OPCODE_TXB 0x31 +#define NVFX_FP_OP_OPCODE_DIV 0x3A + +/* NV30 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B +#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E +#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F +#define NVFX_FP_OP_OPCODE_POW_NV30 0x26 +#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 + +/* NV40 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31 +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +#define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +#define NV40_FP_OP_BRA_OPCODE_IF 0x2 +#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +#define NV40_FP_OP_BRA_OPCODE_REP 0x4 +#define NV40_FP_OP_BRA_OPCODE_RET 0x5 + +#define NVFX_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NVFX_FP_OP_OUT_ABS (1 << 29) +#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27 +#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NVFX_FP_OP_COND_SHIFT 18 +#define NVFX_FP_OP_COND_MASK (0x07 << 18) +# define NVFX_FP_OP_COND_FL 0 +# define NVFX_FP_OP_COND_LT 1 +# define NVFX_FP_OP_COND_EQ 2 +# define NVFX_FP_OP_COND_LE 3 +# define NVFX_FP_OP_COND_GT 4 +# define NVFX_FP_OP_COND_NE 5 +# define NVFX_FP_OP_COND_GE 6 +# define NVFX_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NVFX_FP_OP_DST_SCALE_SHIFT 28 +#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28) +#define NVFX_FP_OP_DST_SCALE_1X 0 +#define NVFX_FP_OP_DST_SCALE_2X 1 +#define NVFX_FP_OP_DST_SCALE_4X 2 +#define NVFX_FP_OP_DST_SCALE_8X 3 +#define NVFX_FP_OP_DST_SCALE_INV_2X 5 +#define NVFX_FP_OP_DST_SCALE_INV_4X 6 +#define NVFX_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +/* high order bits of SRC2 */ +#define NVFX_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NVFX_FP_REG_TYPE_SHIFT 0 +#define NVFX_FP_REG_TYPE_MASK (3 << 0) +# define NVFX_FP_REG_TYPE_TEMP 0 +# define NVFX_FP_REG_TYPE_INPUT 1 +# define NVFX_FP_REG_TYPE_CONST 2 +#define NVFX_FP_REG_SRC_SHIFT 2 +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NVFX_FP_REG_SRC_HALF (1 << 8) +#define NVFX_FP_REG_SWZ_ALL_SHIFT 9 +#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NVFX_FP_REG_SWZ_X_SHIFT 9 +#define NVFX_FP_REG_SWZ_X_MASK (3 << 9) +#define NVFX_FP_REG_SWZ_Y_SHIFT 11 +#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11) +#define NVFX_FP_REG_SWZ_Z_SHIFT 13 +#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13) +#define NVFX_FP_REG_SWZ_W_SHIFT 15 +#define NVFX_FP_REG_SWZ_W_MASK (3 << 15) +# define NVFX_FP_SWIZZLE_X 0 +# define NVFX_FP_SWIZZLE_Y 1 +# define NVFX_FP_SWIZZLE_Z 2 +# define NVFX_FP_SWIZZLE_W 3 +#define NVFX_FP_REG_NEGATE (1 << 17) + +#define NVFXSR_NONE 0 +#define NVFXSR_OUTPUT 1 +#define NVFXSR_INPUT 2 +#define NVFXSR_TEMP 3 +#define NVFXSR_CONST 4 + +#define NVFX_COND_FL 0 +#define NVFX_COND_LT 1 +#define NVFX_COND_EQ 2 +#define NVFX_COND_LE 3 +#define NVFX_COND_GT 4 +#define NVFX_COND_NE 5 +#define NVFX_COND_GE 6 +#define NVFX_COND_TR 7 + +/* Yes, this are ordered differently... */ + +#define NVFX_VP_MASK_X 8 +#define NVFX_VP_MASK_Y 4 +#define NVFX_VP_MASK_Z 2 +#define NVFX_VP_MASK_W 1 +#define NVFX_VP_MASK_ALL 0xf + +#define NVFX_FP_MASK_X 1 +#define NVFX_FP_MASK_Y 2 +#define NVFX_FP_MASK_Z 4 +#define NVFX_FP_MASK_W 8 +#define NVFX_FP_MASK_ALL 0xf + +#define NVFX_SWZ_X 0 +#define NVFX_SWZ_Y 1 +#define NVFX_SWZ_Z 2 +#define NVFX_SWZ_W 3 + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) + +struct nvfx_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nvfx_sreg +nvfx_sr(int type, int index) +{ + struct nvfx_sreg temp = { + .type = type, + .index = index, + .dst_scale = 0, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = NVFX_COND_TR, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nvfx_sreg +nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) +{ + struct nvfx_sreg dst = src; + + dst.swz[NVFX_SWZ_X] = src.swz[x]; + dst.swz[NVFX_SWZ_Y] = src.swz[y]; + dst.swz[NVFX_SWZ_Z] = src.swz[z]; + dst.swz[NVFX_SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nvfx_sreg +nvfx_sr_neg(struct nvfx_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_abs(struct nvfx_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_scale(struct nvfx_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c new file mode 100644 index 0000000000..88a9d01c50 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -0,0 +1,619 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" +#include "nvfx_tex.h" + +static void * +nvfx_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(5, 8, 0); + + if (cso->rt[0].blend_enable) { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + if(nvfx->screen->base.device->chipset < 0x40) { + so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + } else { + so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + nvgl_blend_eqn(cso->rt[0].rgb_func)); + } + } else { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + /* TODO: add NV40 MRT color mask */ + + if (cso->logicop_enable) { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + so_ref(NULL, &so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend = hwcso; + nvfx->dirty |= NVFX_NEW_BLEND; +} + +static void +nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static void * +nvfx_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_state *ps; + + ps = MALLOC(sizeof(struct nvfx_sampler_state)); + + /* on nv30, we use this as an internal flag */ + ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT; + ps->en = 0; + ps->filt = nvfx_tex_filter(cso); + ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) | + nvfx_tex_wrap_compare_mode(cso); + ps->bcol = nvfx_tex_border_color(cso->border_color); + + if(nvfx->is_nv4x) + nv40_sampler_state_init(pipe, ps, cso); + else + nv30_sampler_state_init(pipe, ps, cso); + + return (void *)ps; +} + +static void +nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nvfx->tex_sampler[unit] = sampler[unit]; + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_samplers; unit++) { + nvfx->tex_sampler[unit] = NULL; + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_samplers = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void +nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], miptree[unit]); + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], NULL); + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_textures = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void * +nvfx_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(9, 19, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); + if (cso->point_quad_rasterization) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if ((cso->sprite_coord_enable >> i) & 1) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + so_ref(NULL, &so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->rasterizer = hwcso; + nvfx->dirty |= NVFX_NEW_RAST; + nvfx->draw_dirty |= NVFX_NEW_RAST; +} + +static void +nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(6, 20, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + so_ref(NULL, &so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->zsa = hwcso; + nvfx->dirty |= NVFX_NEW_ZSA; +} + +static void +nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nvfx_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vertprog = hwcso; + nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->draw_dirty |= NVFX_NEW_VERTPROG; +} + +static void +nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nvfx->draw, vp->draw); + nvfx_vertprog_destroy(nvfx, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nvfx_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->fragprog = hwcso; + nvfx->dirty |= NVFX_NEW_FRAGPROG; +} + +static void +nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_fragment_program *fp = hwcso; + + nvfx_fragprog_destroy(nvfx, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nvfx_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend_colour = *bcol; + nvfx->dirty |= NVFX_NEW_BCOL; +} + +static void +nvfx_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->stencil_ref = *sr; + nvfx->dirty |= NVFX_NEW_SR; +} + +static void +nvfx_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->clip = *clip; + nvfx->dirty |= NVFX_NEW_UCP; + nvfx->draw_dirty |= NVFX_NEW_UCP; +} + +static void +nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_buffer *buf ) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->constbuf[shader] = buf; + nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nvfx->dirty |= NVFX_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nvfx->dirty |= NVFX_NEW_FRAGPROG; + } +} + +static void +nvfx_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->framebuffer = *fb; + nvfx->dirty |= NVFX_NEW_FB; +} + +static void +nvfx_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->stipple, stipple->stipple, 4 * 32); + nvfx->dirty |= NVFX_NEW_STIPPLE; +} + +static void +nvfx_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->scissor = *s; + nvfx->dirty |= NVFX_NEW_SCISSOR; +} + +static void +nvfx_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->viewport = *vpt; + nvfx->dirty |= NVFX_NEW_VIEWPORT; + nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; +} + +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); + nvfx->vtxbuf_nr = count; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + + assert(num_elements < 16); /* not doing fallbacks yet */ + cso->num_elements = num_elements; + memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); + +/* nvfx_vtxelt_construct(cso);*/ + + return (void *)cso; +} + +static void +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vtxelt = hwcso; + nvfx->dirty |= NVFX_NEW_ARRAYS; + /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ +} + +void +nvfx_init_state_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_blend_state = nvfx_blend_state_create; + nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; + nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; + + nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; + nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; + + nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; + nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind; + nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete; + + nvfx->pipe.create_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_create; + nvfx->pipe.bind_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_bind; + nvfx->pipe.delete_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_delete; + + nvfx->pipe.create_vs_state = nvfx_vp_state_create; + nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; + nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; + + nvfx->pipe.create_fs_state = nvfx_fp_state_create; + nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; + nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; + + nvfx->pipe.set_blend_color = nvfx_set_blend_color; + nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref; + nvfx->pipe.set_clip_state = nvfx_set_clip_state; + nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer; + nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state; + nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; + nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; + nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; + + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; +} diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index e2e69420ea..e585246879 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -1,29 +1,21 @@ -#ifndef __NV40_STATE_H__ -#define __NV40_STATE_H__ +#ifndef __NVFX_STATE_H__ +#define __NVFX_STATE_H__ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -struct nv40_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv40_vertex_program_exec { +struct nvfx_vertex_program_exec { uint32_t data[4]; boolean has_branch_offset; int const_index; }; -struct nv40_vertex_program_data { +struct nvfx_vertex_program_data { int index; /* immediates == -1 */ float value[4]; }; -struct nv40_vertex_program { +struct nvfx_vertex_program { struct pipe_shader_state pipe; struct draw_vertex_shader *draw; @@ -32,9 +24,9 @@ struct nv40_vertex_program { struct pipe_clip_state ucp; - struct nv40_vertex_program_exec *insns; + struct nvfx_vertex_program_exec *insns; unsigned nr_insns; - struct nv40_vertex_program_data *consts; + struct nvfx_vertex_program_data *consts; unsigned nr_consts; struct nouveau_resource *exec; @@ -49,12 +41,12 @@ struct nv40_vertex_program { struct nouveau_stateobj *so; }; -struct nv40_fragment_program_data { +struct nvfx_fragment_program_data { unsigned offset; unsigned index; }; -struct nv40_fragment_program { +struct nvfx_fragment_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -64,7 +56,7 @@ struct nv40_fragment_program { uint32_t *insn; int insn_len; - struct nv40_fragment_program_data *consts; + struct nvfx_fragment_program_data *consts; unsigned nr_consts; struct pipe_buffer *buffer; @@ -73,9 +65,9 @@ struct nv40_fragment_program { struct nouveau_stateobj *so; }; -#define NV40_MAX_TEXTURE_LEVELS 16 +#define NVFX_MAX_TEXTURE_LEVELS 16 -struct nv40_miptree { +struct nvfx_miptree { struct pipe_texture base; struct nouveau_bo *bo; @@ -85,7 +77,7 @@ struct nv40_miptree { struct { uint pitch; uint *image_offset; - } level[NV40_MAX_TEXTURE_LEVELS]; + } level[NVFX_MAX_TEXTURE_LEVELS]; }; #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c new file mode 100644 index 0000000000..03b6ef8117 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_blend.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_blend_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend = { + .validate = nvfx_state_blend_validate, + .dirty = { + .pipe = NVFX_NEW_BLEND, + .hw = NVFX_STATE_BLEND + } +}; + +static boolean +nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(1, 1, 0); + struct pipe_blend_color *bcol = &nvfx->blend_colour; + + so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend_colour = { + .validate = nvfx_state_blend_colour_validate, + .dirty = { + .pipe = NVFX_NEW_BCOL, + .hw = NVFX_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c new file mode 100644 index 0000000000..72537388ea --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -0,0 +1,179 @@ +#include "nvfx_context.h" +#include "nvfx_state.h" +#include "draw/draw_context.h" + +#define RENDER_STATES(name, vbo) \ +static struct nvfx_state_entry *name##render_states[] = { \ + &nvfx_state_framebuffer, \ + &nvfx_state_rasterizer, \ + &nvfx_state_scissor, \ + &nvfx_state_stipple, \ + &nvfx_state_fragprog, \ + &nvfx_state_fragtex, \ + &nvfx_state_vertprog, \ + &nvfx_state_blend, \ + &nvfx_state_blend_colour, \ + &nvfx_state_zsa, \ + &nvfx_state_sr, \ + &nvfx_state_viewport, \ + &nvfx_state_##vbo, \ + NULL \ +} + +RENDER_STATES(, vbo); +RENDER_STATES(swtnl_, vtxfmt); + +static void +nvfx_state_do_validate(struct nvfx_context *nvfx, + struct nvfx_state_entry **states) +{ + while (*states) { + struct nvfx_state_entry *e = *states; + + if (nvfx->dirty & e->dirty.pipe) { + if (e->validate(nvfx)) + nvfx->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nvfx->dirty = 0; +} + +void +nvfx_state_emit(struct nvfx_context *nvfx) +{ + struct nvfx_state *state = &nvfx->state; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + uint64_t states; + + /* XXX: race conditions + */ + if (nvfx != screen->cur_ctx) { + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_ctx = nvfx; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nvfx->screen->state[i]); + if (state->hw[i]) + so_emit(chan, nvfx->screen->state[i]); + states &= ~(1ULL << i); + } + + /* TODO: could nv30 need this or something similar too? */ + if(nvfx->is_nv4x) { + if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) | + (1ULL << NVFX_STATE_FRAGTEX0))) { + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); + } + } + state->dirty = 0; +} + +void +nvfx_state_flush_notify(struct nouveau_channel *chan) +{ + struct nvfx_context *nvfx = chan->user_private; + struct nvfx_state *state = &nvfx->state; + unsigned i, samplers; + + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(chan, + state->hw[NVFX_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); + if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW) + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); +} + +boolean +nvfx_state_validate(struct nvfx_context *nvfx) +{ + boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; + + if (nvfx->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nvfx->fallback_swtnl & nvfx->dirty) + != nvfx->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = HW; + } + + nvfx_state_do_validate(nvfx, render_states); + + if (nvfx->fallback_swtnl || nvfx->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nvfx_state_validate_swtnl(struct nvfx_context *nvfx) +{ + struct draw_context *draw = nvfx->draw; + + /* Setup for swtnl */ + if (nvfx->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = SWTNL; + } + + if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nvfx->vertprog->draw); + + if (nvfx->draw_dirty & NVFX_NEW_RAST) + draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe); + + if (nvfx->draw_dirty & NVFX_NEW_UCP) + draw_set_clip_state(draw, &nvfx->clip); + + if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nvfx->viewport); + + if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf); + draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); + } + + nvfx_state_do_validate(nvfx, swtnl_render_states); + + if (nvfx->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); + return FALSE; + } + + nvfx->draw_dirty = 0; + return TRUE; +} diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c new file mode 100644 index 0000000000..dd64ba4193 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -0,0 +1,234 @@ +#include "nvfx_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nvfx_do_surface_buffer(struct pipe_surface *surface) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture; + return mt->buffer; +} + +#define nvfx_surface_buffer(ps) nouveau_bo(nvfx_do_surface_buffer(ps)) + +static boolean +nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +{ + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nv04_surface *rt[4], *zeta = NULL; + uint32_t rt_enable = 0, rt_format = 0; + int i, colour_format = 0, zeta_format = 0; + int depth_only = 0; + struct nouveau_stateobj *so = so_new(18, 24, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + int colour_bits = 32, zeta_bits = 32; + + if(!nvfx->is_nv4x) + assert(fb->nr_cbufs <= 2); + else + assert(fb->nr_cbufs <= 4); + + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = (struct nv04_surface *)fb->cbufs[i]; + } + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = (struct nv04_surface *)fb->zsbuf; + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { + /* Render to at least a colour buffer */ + if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else if (fb->zsbuf) { + depth_only = 1; + + /* Render to depth buffer only */ + if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else { + return FALSE; + } + + switch (colour_format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + colour_bits = 16; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + zeta_bits = 16; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { + /* TODO: does this limitation really exist? + TODO: can it be worked around somehow? */ + return FALSE; + } + + if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) + || ((!nvfx->is_nv4x) && depth_only)) { + struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + uint32_t pitch = rt0->pitch; + + if(!nvfx->is_nv4x) + { + if (zeta) { + pitch |= (zeta->pitch << 16); + } else { + pitch |= (pitch << 16); + } + } + + so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, nvfx_surface_buffer(&rt0->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, nvfx_surface_buffer(&rt[0]->base), + rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), + rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_data (so, rt[1]->pitch); + } + + if(nvfx->is_nv4x) + { + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), + rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), + rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch); + } + } + + if (zeta_format) { + so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); + so_reloc (so, nvfx_surface_buffer(&zeta->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); + /* TODO: reverse engineer LMA */ + so_reloc (so, nvfx_surface_buffer(&zeta->base), + zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch); + } + } + + so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, eng3d, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + if(!nvfx->is_nv4x) { + /* Wonder why this is needed, context should all be set to zero on init */ + /* TODO: we can most likely remove this, after putting it in context init */ + so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_framebuffer = { + .validate = nvfx_state_framebuffer_validate, + .dirty = { + .pipe = NVFX_NEW_FB, + .hw = NVFX_STATE_FB + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c new file mode 100644 index 0000000000..0d35ecbf20 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_rasterizer_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->rasterizer->so, + &nvfx->state.hw[NVFX_STATE_RAST]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_rasterizer = { + .validate = nvfx_state_rasterizer_validate, + .dirty = { + .pipe = NVFX_NEW_RAST, + .hw = NVFX_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c new file mode 100644 index 0000000000..940d8cb5c0 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_scissor.c @@ -0,0 +1,36 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_scissor_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_SCISSOR] && + (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return FALSE; + nvfx->state.scissor_enabled = rast->scissor; + + so = so_new(1, 2, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); + if (nvfx->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_scissor = { + .validate = nvfx_state_scissor_validate, + .dirty = { + .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, + .hw = NVFX_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c new file mode 100644 index 0000000000..57cd3c936a --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c @@ -0,0 +1,40 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_stipple_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(2, 33, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nvfx->stipple[i]); + } else { + so = so_new(1, 1, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_stipple = { + .validate = nvfx_state_stipple_validate, + .dirty = { + .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, + .hw = NVFX_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c new file mode 100644 index 0000000000..82e0e9220b --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c @@ -0,0 +1,51 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_viewport_validate(struct nvfx_context *nvfx) +{ + struct pipe_viewport_state *vpt = &nvfx->viewport; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_VIEWPORT] && + !(nvfx->dirty & NVFX_NEW_VIEWPORT)) + return FALSE; + + so = so_new(2, 9, 0); + so_method(so, nvfx->screen->eng3d, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + if(nvfx->render_mode == HW) { + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, 1); + } else { + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, nvfx->is_nv4x ? 0x110 : 1); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_viewport = { + .validate = nvfx_state_viewport_validate, + .dirty = { + .pipe = NVFX_NEW_VIEWPORT, + .hw = NVFX_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c new file mode 100644 index 0000000000..c84fd041c1 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_zsa.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_zsa_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->zsa->so, + &nvfx->state.hw[NVFX_STATE_ZSA]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_zsa = { + .validate = nvfx_state_zsa_validate, + .dirty = { + .pipe = NVFX_NEW_ZSA, + .hw = NVFX_STATE_ZSA + } +}; + +static boolean +nvfx_state_sr_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(2, 2, 0); + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); + so_data (so, sr->ref_value[0]); + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); + so_data (so, sr->ref_value[1]); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_sr = { + .validate = nvfx_state_sr_validate, + .dirty = { + .pipe = NVFX_NEW_SR, + .hw = NVFX_STATE_SR + } +}; diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index bc18e577ee..8a05ad0a57 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -1,9 +1,9 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -11,11 +11,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -23,40 +23,40 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#include "nv30_context.h" +#include "nvfx_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_tile.h" static void -nv30_surface_copy(struct pipe_context *pipe, +nvfx_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); } static void -nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, unsigned width, unsigned height, unsigned value) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; eng2d->fill(eng2d, dest, destx, desty, width, height, value); } void -nv30_init_surface_functions(struct nv30_context *nv30) +nvfx_init_surface_functions(struct nvfx_context *nvfx) { - nv30->pipe.surface_copy = nv30_surface_copy; - nv30->pipe.surface_fill = nv30_surface_fill; + nvfx->pipe.surface_copy = nvfx_surface_copy; + nvfx->pipe.surface_fill = nvfx_surface_fill; } diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h new file mode 100644 index 0000000000..69187a79e7 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_tex.h @@ -0,0 +1,133 @@ +#ifndef NVFX_TEX_H_ +#define NVFX_TEX_H_ + +static inline unsigned +nvfx_tex_wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static inline unsigned +nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso) +{ + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + return NV34TCL_TX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER: + return NV34TCL_TX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL: + return NV34TCL_TX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL: + return NV34TCL_TX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS: + return NV34TCL_TX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: + return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: + return NV34TCL_TX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS: + return NV34TCL_TX_WRAP_RCOMP_ALWAYS; + default: + break; + } + } + return 0; +} + +static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) +{ + unsigned filter = 0; + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + return filter; +} + +static inline unsigned nvfx_tex_border_color(const float* border_color) +{ + return ((float_to_ubyte(border_color[3]) << 24) | + (float_to_ubyte(border_color[0]) << 16) | + (float_to_ubyte(border_color[1]) << 8) | + (float_to_ubyte(border_color[2]) << 0)); +} + +struct nvfx_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +#endif /* NVFX_TEX_H_ */ diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 0462a042c3..409b354d58 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -5,18 +5,18 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "nouveau/nouveau_winsys.h" -#include "nv40_context.h" -#include "nv40_screen.h" -#include "nv40_state.h" +#include "nvfx_context.h" +#include "nvfx_screen.h" +#include "nvfx_state.h" -struct nv40_transfer { +struct nvfx_transfer { struct pipe_transfer base; struct pipe_surface *surface; boolean direct; }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, +nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); @@ -33,16 +33,17 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h } static struct pipe_transfer * -nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, unsigned w, unsigned h) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; - struct nv40_transfer *tx; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + struct nvfx_transfer *tx; struct pipe_texture tx_tex_template, *tx_tex; - tx = CALLOC_STRUCT(nv40_transfer); + tx = CALLOC_STRUCT(nvfx_transfer); if (!tx) return NULL; @@ -71,7 +72,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); + nvfx_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,7 +81,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch; tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, @@ -96,7 +97,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } if (usage & PIPE_TRANSFER_READ) { - struct nv40_screen *nvscreen = nv40_screen(pscreen); + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); struct pipe_surface *src; src = pscreen->get_tex_surface(pscreen, pt, @@ -117,13 +118,14 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } static void -nv40_transfer_del(struct pipe_transfer *ptx) +nvfx_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv40_screen *nvscreen = nv40_screen(pscreen); + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); struct pipe_surface *dst; dst = pscreen->get_tex_surface(pscreen, ptx->texture, @@ -145,11 +147,12 @@ nv40_transfer_del(struct pipe_transfer *ptx) } static void * -nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nvfx_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); @@ -160,19 +163,20 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } static void -nv40_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nvfx_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; - struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; pipe_buffer_unmap(pscreen, mt->buffer); } void -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen) +nvfx_init_transfer_functions(struct nvfx_context *nvfx) { - pscreen->get_tex_transfer = nv40_transfer_new; - pscreen->tex_transfer_destroy = nv40_transfer_del; - pscreen->transfer_map = nv40_transfer_map; - pscreen->transfer_unmap = nv40_transfer_unmap; + nvfx->pipe.get_tex_transfer = nvfx_transfer_new; + nvfx->pipe.tex_transfer_destroy = nvfx_transfer_del; + nvfx->pipe.transfer_map = nvfx_transfer_map; + nvfx->pipe.transfer_unmap = nvfx_transfer_unmap; } diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index f3856bb5a5..257087f8f6 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -3,17 +3,24 @@ #include "util/u_inlines.h" #include "util/u_format.h" -#include "nv30_context.h" -#include "nv30_state.h" +#include "nvfx_context.h" +#include "nvfx_state.h" #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" -#define FORCE_SWTNL 0 +static boolean +nvfx_force_swtnl(struct nvfx_context *nvfx) +{ + static int force_swtnl = -1; + if(force_swtnl < 0) + force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0); + return force_swtnl; +} static INLINE int -nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { switch (pipe) { case PIPE_FORMAT_R32_FLOAT: @@ -69,15 +76,15 @@ nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) } static boolean -nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, +nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, unsigned ib_size) { - struct pipe_screen *pscreen = &nv30->screen->base.base; + struct pipe_screen *pscreen = &nvfx->screen->base.base; unsigned type; if (!ib) { - nv30->idxbuf = NULL; - nv30->idxbuf_format = 0xdeadbeef; + nvfx->idxbuf = NULL; + nvfx->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -95,27 +102,27 @@ nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, return FALSE; } - if (ib != nv30->idxbuf || - type != nv30->idxbuf_format) { - nv30->dirty |= NV30_NEW_ARRAYS; - nv30->idxbuf = ib; - nv30->idxbuf_format = type; + if (ib != nvfx->idxbuf || + type != nvfx->idxbuf_format) { + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->idxbuf = ib; + nvfx->idxbuf_format = type; } return TRUE; } static boolean -nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, +nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, int attrib, struct pipe_vertex_element *ve, struct pipe_vertex_buffer *vb) { - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned type, ncomp; void *map; - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -128,25 +135,25 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, switch (ncomp) { case 4: - so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); so_data (so, fui(v[0])); so_data (so, fui(v[1])); so_data (so, fui(v[2])); so_data (so, fui(v[3])); break; case 3: - so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); so_data (so, fui(v[0])); so_data (so, fui(v[1])); so_data (so, fui(v[2])); break; case 2: - so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); so_data (so, fui(v[0])); so_data (so, fui(v[1])); break; case 1: - so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; default: @@ -165,26 +172,26 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, } void -nv30_draw_arrays(struct pipe_context *pipe, +nvfx_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; - nv30_vbo_set_idxbuf(nv30, NULL, 0); - if (FORCE_SWTNL || !nv30_state_validate(nv30)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ - return; + nvfx_vbo_set_idxbuf(nvfx, NULL, 0); + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { unsigned vc, nr; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -193,12 +200,12 @@ nv30_draw_arrays(struct pipe_context *pipe, continue; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -209,14 +216,14 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); count -= vc; @@ -227,18 +234,18 @@ nv30_draw_arrays(struct pipe_context *pipe, } static INLINE void -nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint8_t *elts = (uint8_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -248,11 +255,11 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -262,7 +269,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -270,7 +277,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -278,18 +285,18 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, } static INLINE void -nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint16_t *elts = (uint16_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -299,11 +306,11 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -313,7 +320,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -321,7 +328,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -329,18 +336,18 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, } static INLINE void -nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint32_t *elts = (uint32_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, mode, start, count, &restart); @@ -350,20 +357,20 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -371,11 +378,11 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, } static void -nv30_draw_elements_inline(struct pipe_context *pipe, +nvfx_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); + struct nvfx_context *nvfx = nvfx_context(pipe); struct pipe_screen *pscreen = pipe->screen; void *map; @@ -387,13 +394,13 @@ nv30_draw_elements_inline(struct pipe_context *pipe, switch (ib_size) { case 1: - nv30_draw_elements_u08(nv30, map, mode, start, count); + nvfx_draw_elements_u08(nvfx, map, mode, start, count); break; case 2: - nv30_draw_elements_u16(nv30, map, mode, start, count); + nvfx_draw_elements_u16(nvfx, map, mode, start, count); break; case 4: - nv30_draw_elements_u32(nv30, map, mode, start, count); + nvfx_draw_elements_u32(nvfx, map, mode, start, count); break; default: NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); @@ -404,19 +411,19 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } static void -nv30_draw_elements_vbo(struct pipe_context *pipe, +nvfx_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; while (count) { unsigned nr, vc; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -425,12 +432,12 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, continue; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -441,14 +448,14 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); count -= vc; @@ -457,24 +464,24 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, } void -nv30_draw_elements(struct pipe_context *pipe, +nvfx_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); + struct nvfx_context *nvfx = nvfx_context(pipe); boolean idxbuf; - idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv30_state_validate(nv30)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ + idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize, + mode, start, count); return; } if (idxbuf) { - nv30_draw_elements_vbo(pipe, mode, start, count); + nvfx_draw_elements_vbo(pipe, mode, start, count); } else { - nv30_draw_elements_inline(pipe, indexBuffer, indexSize, + nvfx_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count); } @@ -482,49 +489,50 @@ nv30_draw_elements(struct pipe_context *pipe, } static boolean -nv30_vbo_validate(struct nv30_context *nv30) +nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct pipe_buffer *ib = nv30->idxbuf; - unsigned ib_format = nv30->idxbuf_format; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_buffer *ib = nvfx->idxbuf; + unsigned ib_format = nvfx->idxbuf_format; unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt->num_elements); + so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt->num_elements); + so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); - for (hw = 0; hw < nv30->vtxelt->num_elements; hw++) { + for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; unsigned type, ncomp; - ve = &nv30->vtxelt->pipe[hw]; - vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + ve = &nvfx->vtxelt->pipe[hw]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; if (!vb->stride) { if (!sattr) sattr = so_new(16, 16 * 4, 0); - if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { + if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); continue; } } - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; so_ref(NULL, &vtxbuf); so_ref(NULL, &vtxfmt); return FALSE; } - so_reloc(vtxbuf, nouveau_bo(vb->buffer), vb->buffer_offset + - ve->src_offset, vb_flags | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_reloc(vtxbuf, nouveau_bo(vb->buffer), + vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); } @@ -532,31 +540,31 @@ nv30_vbo_validate(struct nv30_context *nv30) if (ib) { struct nouveau_bo *bo = nouveau_bo(ib); - so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); + so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, 0, NV34TCL_IDXBUF_FORMAT_DMA1); } - so_method(vtxbuf, rankine, 0x1710, 1); + so_method(vtxbuf, eng3d, 0x1710, 1); so_data (vtxbuf, 0); - so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); + so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); so_ref(NULL, &vtxbuf); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); - so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); + so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); so_ref(NULL, &vtxfmt); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); - so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); + so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); so_ref(NULL, &sattr); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); return FALSE; } -struct nv30_state_entry nv30_state_vbo = { - .validate = nv30_vbo_validate, +struct nvfx_state_entry nvfx_state_vbo = { + .validate = nvfx_vbo_validate, .dirty = { - .pipe = NV30_NEW_ARRAYS, + .pipe = NVFX_NEW_ARRAYS, .hw = 0, } }; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index b289eef0fc..2d243be16a 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -5,10 +5,11 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_util.h" -#include "nv40_context.h" -#include "nv40_state.h" +#include "nvfx_context.h" +#include "nvfx_state.h" /* TODO (at least...): * 1. Indexed consts + ARL @@ -21,76 +22,62 @@ * 4. bugs */ -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv40_shader.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) +#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) -#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) +struct nvfx_vpc { + struct nvfx_vertex_program *vp; -struct nv40_vpc { - struct nv40_vertex_program *vp; - - struct nv40_vertex_program_exec *vpi; + struct nvfx_vertex_program_exec *vpi; unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_address; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_address; + struct nvfx_sreg *r_temp; - struct nv40_sreg *imm; + struct nvfx_sreg *imm; unsigned nr_imm; unsigned hpos_idx; }; -static struct nv40_sreg -temp(struct nv40_vpc *vpc) +static struct nvfx_sreg +temp(struct nvfx_vpc *vpc) { int idx = ffs(~vpc->r_temps) - 1; if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } vpc->r_temps |= (1 << idx); vpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void -release_temps(struct nv40_vpc *vpc) +release_temps(struct nvfx_vpc *vpc) { vpc->r_temps &= ~vpc->r_temps_discard; vpc->r_temps_discard = 0; } -static struct nv40_sreg -constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +static struct nvfx_sreg +constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) { - struct nv40_vertex_program *vp = vpc->vp; - struct nv40_vertex_program_data *vpd; + struct nvfx_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program_data *vpd; int idx; if (pipe >= 0) { for (idx = 0; idx < vp->nr_consts; idx++) { if (vp->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } } @@ -103,70 +90,70 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) vpd->value[1] = y; vpd->value[2] = z; vpd->value[3] = w; - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; switch (src.type) { - case NV40SR_TEMP: - sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); + sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; - case NV40SR_INPUT: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; - case NV40SR_CONST: - sr |= (NV40_VP_SRC_REG_TYPE_CONST << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_CONST: + sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); assert(vpc->vpi->const_index == -1 || vpc->vpi->const_index == src.index); vpc->vpi->const_index = src.index; break; - case NV40SR_NONE: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); break; default: assert(0); } if (src.negate) - sr |= NV40_VP_SRC_NEGATE; + sr |= NVFX_VP(SRC_NEGATE); if (src.abs) hw[0] |= (1 << (21 + pos)); - sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | + (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | + (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | + (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); switch (pos) { case 0: - hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> - NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << - NV40_VP_INST_SRC0L_SHIFT; + hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> + NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); + hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << + NVFX_VP(INST_SRC0L_SHIFT); break; case 1: - hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); break; case 2: - hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> - NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << - NV40_VP_INST_SRC2L_SHIFT; + hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> + NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); + hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << + NVFX_VP(INST_SRC2L_SHIFT); break; default: assert(0); @@ -174,78 +161,114 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) } static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { - case NV40SR_TEMP: - hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + case NVFXSR_TEMP: + if(!nvfx->is_nv4x) + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } } break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: + /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - case NV40_VP_INST_DEST_CLIP(0): + case NVFX_VP_INST_DEST_CLIP(0): vp->or |= (1 << 6); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(1): + case NVFX_VP_INST_DEST_CLIP(1): vp->or |= (1 << 7); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(2): + case NVFX_VP_INST_DEST_CLIP(2): vp->or |= (1 << 8); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(3): + case NVFX_VP_INST_DEST_CLIP(3): vp->or |= (1 << 9); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; + dst.index = NVFX_VP(INST_DEST_PSZ); break; - case NV40_VP_INST_DEST_CLIP(4): + case NVFX_VP_INST_DEST_CLIP(4): vp->or |= (1 << 10); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; + dst.index = NVFX_VP(INST_DEST_PSZ); break; - case NV40_VP_INST_DEST_CLIP(5): + case NVFX_VP_INST_DEST_CLIP(5): vp->or |= (1 << 11); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; + dst.index = NVFX_VP(INST_DEST_PSZ); break; default: + if(!nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } else { + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } break; } - hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); - if (slot == 0) { - hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + if(!nvfx->is_nv4x) { + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; } else { - hw[3] |= NV40_VP_INST_SCA_RESULT; - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } } break; default: @@ -254,12 +277,12 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) } static void -nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, - struct nv40_sreg s2) +nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; uint32_t *hw; vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); @@ -269,35 +292,53 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, hw = vpc->vpi->data; - hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); - - if (slot == 0) { - hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); - } else { - hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + + if(!nvfx->is_nv4x) { + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); +// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); + + if (dst.type == NVFXSR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + } else { + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } } - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); + emit_dst(nvfx, vpc, hw, slot, dst); + emit_src(nvfx, vpc, hw, 0, s0); + emit_src(nvfx, vpc, hw, 1, s1); + emit_src(nvfx, vpc, hw, 2, s2); } -static INLINE struct nv40_sreg -tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -322,9 +363,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { return src; } -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv40_sreg dst; +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nvfx_sreg dst; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -349,52 +390,19 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; return mask; } static boolean -src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= tgsi_mask(1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(vpc); - - if (mask) - arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, +nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_instruction *finst) { - struct nv40_sreg src[3], dst, tmp; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int mask; int ai = -1, ci = -1, ii = -1; int i; @@ -418,23 +426,12 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(vpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -445,7 +442,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -456,7 +453,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -474,93 +471,96 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, + arith(vpc, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); break; case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, + arith(vpc, VEC, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -574,7 +574,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } static boolean -nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, +nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_declaration *fdec) { unsigned idx = fdec->Range.First; @@ -582,15 +582,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV40_VP_INST_DEST_POS; + hw = NVFX_VP(INST_DEST_POS); vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_COL0; + hw = NVFX_VP(INST_DEST_COL0); } else if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_COL1; + hw = NVFX_VP(INST_DEST_COL1); } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; @@ -598,24 +598,24 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, break; case TGSI_SEMANTIC_BCOLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_BFC0; + hw = NVFX_VP(INST_DEST_BFC0); } else if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_BFC1; + hw = NVFX_VP(INST_DEST_BFC1); } else { NOUVEAU_ERR("bad bcolour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV40_VP_INST_DEST_FOGC; + hw = NVFX_VP(INST_DEST_FOGC); break; case TGSI_SEMANTIC_PSIZE: - hw = NV40_VP_INST_DEST_PSZ; + hw = NVFX_VP(INST_DEST_PSZ); break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); + hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -630,12 +630,12 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } - vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); return TRUE; } static boolean -nv40_vertprog_prepare(struct nv40_vpc *vpc) +nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) { struct tgsi_parse_context p; int high_temp = -1, high_addr = -1, nr_imm = 0, i; @@ -670,7 +670,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) break; #endif case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) return FALSE; break; default: @@ -691,7 +691,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) if (fdst->Register.Index > high_addr) high_addr = fdst->Register.Index; } - + } break; #endif @@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) tgsi_parse_free(&p); if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); assert(vpc->imm); } if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) vpc->r_temp[i] = temp(vpc); } if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); for (i = 0; i < high_addr; i++) vpc->r_address[i] = temp(vpc); } @@ -723,26 +723,26 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) } static void -nv40_vertprog_translate(struct nv40_context *nv40, - struct nv40_vertex_program *vp) +nvfx_vertprog_translate(struct nvfx_context *nvfx, + struct nvfx_vertex_program *vp) { struct tgsi_parse_context parse; - struct nv40_vpc *vpc = NULL; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_vpc *vpc = NULL; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int i; - vpc = CALLOC(1, sizeof(struct nv40_vpc)); + vpc = CALLOC(1, sizeof(struct nvfx_vpc)); if (!vpc) return; vpc->vp = vp; - if (!nv40_vertprog_prepare(vpc)) { + if (!nvfx_vertprog_prepare(nvfx, vpc)) { FREE(vpc); return; } /* Redirect post-transform vertex position to a temp if user clip - * planes are enabled. We need to append code the the vtxprog + * planes are enabled. We need to append code to the vtxprog * to handle clip planes later. */ if (vp->ucp.nr) { @@ -775,7 +775,7 @@ nv40_vertprog_translate(struct nv40_context *nv40, { const struct tgsi_full_instruction *finst; finst = &parse.FullToken.FullInstruction; - if (!nv40_vertprog_parse_instruction(vpc, finst)) + if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst)) goto out_err; } break; @@ -785,74 +785,74 @@ nv40_vertprog_translate(struct nv40_context *nv40, } /* Write out HPOS if it was redirected to a temp earlier */ - if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { - struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, - NV40_VP_INST_DEST_POS); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP(INST_DEST_POS)); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none); } /* Insert code to handle user clip planes */ for (i = 0; i < vp->ucp.nr; i++) { - struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, - NV40_VP_INST_DEST_CLIP(i)); - struct nv40_sreg ceqn = constant(vpc, -1, - nv40->clip.ucp[i][0], - nv40->clip.ucp[i][1], - nv40->clip.ucp[i][2], - nv40->clip.ucp[i][3]); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP_INST_DEST_CLIP(i)); + struct nvfx_sreg ceqn = constant(vpc, -1, + nvfx->clip.ucp[i][0], + nvfx->clip.ucp[i][1], + nvfx->clip.ucp[i][2], + nvfx->clip.ucp[i][3]); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; unsigned mask; switch (i) { - case 0: case 3: mask = MASK_Y; break; - case 1: case 4: mask = MASK_Z; break; - case 2: case 5: mask = MASK_W; break; + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; default: NOUVEAU_ERR("invalid clip dist #%d\n", i); goto out_err; } - arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); } - vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; vp->translated = TRUE; out_err: tgsi_parse_free(&parse); if (vpc->r_temp) - FREE(vpc->r_temp); + FREE(vpc->r_temp); if (vpc->r_address) - FREE(vpc->r_address); - if (vpc->imm) - FREE(vpc->imm); + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); FREE(vpc); } static boolean -nv40_vertprog_validate(struct nv40_context *nv40) -{ - struct pipe_screen *pscreen = nv40->pipe.screen; - struct nv40_screen *screen = nv40->screen; +nvfx_vertprog_validate(struct nvfx_context *nvfx) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - struct nv40_vertex_program *vp; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; - if (nv40->render_mode == HW) { - vp = nv40->vertprog; - constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + if (nvfx->render_mode == HW) { + vp = nvfx->vertprog; + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - if ((nv40->dirty & NV40_NEW_UCP) || - memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { - nv40_vertprog_destroy(nv40, vp); - memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + if ((nvfx->dirty & NVFX_NEW_UCP) || + memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { + nvfx_vertprog_destroy(nvfx, vp); + memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); } } else { - vp = nv40->swtnl.vertprog; + vp = nvfx->swtnl.vertprog; constbuf = NULL; } @@ -860,24 +860,24 @@ nv40_vertprog_validate(struct nv40_context *nv40) if (vp->translated) goto check_gpu_resources; - nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; - nv40_vertprog_translate(nv40, vp); + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + nvfx_vertprog_translate(nvfx, vp); if (!vp->translated) { - nv40->fallback_swtnl |= NV40_NEW_VERTPROG; - return FALSE; + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; } check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { - struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { while (heap->next && heap->size < vplen) { - struct nv40_vertex_program *evict; - + struct nvfx_vertex_program *evict; + evict = heap->next->priv; nouveau_resource_free(&evict->exec); } @@ -887,12 +887,14 @@ check_gpu_resources: } so = so_new(3, 4, 0); - so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); + so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + } + so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); so_data (so, vp->clip_ctrl); so_ref(so, &vp->so); so_ref(NULL, &so); @@ -902,12 +904,12 @@ check_gpu_resources: /* Allocate hw vtxprog const slots */ if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv40->screen->vp_data_heap; + struct nouveau_resource *heap = nvfx->screen->vp_data_heap; if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { - struct nv40_vertex_program *evict; - + struct nvfx_vertex_program *evict; + evict = heap->next->priv; nouveau_resource_free(&evict->data); } @@ -929,7 +931,7 @@ check_gpu_resources: */ if (vp->exec_start != vp->exec->start) { for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; if (vpi->has_branch_offset) { assert(0); @@ -941,13 +943,13 @@ check_gpu_resources: if (vp->nr_consts && vp->data_start != vp->data->start) { for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); vpi->data[1] |= (vpi->const_index + vp->data->start) << - NV40_VP_INST_CONST_SRC_SHIFT; + NVFX_VP(INST_CONST_SRC_SHIFT); } } @@ -965,7 +967,7 @@ check_gpu_resources: } for (i = 0; i < vp->nr_consts; i++) { - struct nv40_vertex_program_data *vpd = &vp->consts[i]; + struct nvfx_vertex_program_data *vpd = &vp->consts[i]; if (vpd->index >= 0) { if (!upload_data && @@ -976,13 +978,13 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); OUT_RING (chan, i + vp->data->start); OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) - pscreen->buffer_unmap(pscreen, constbuf); + pipe_buffer_unmap(pscreen, constbuf); } /* Upload vtxprog */ @@ -995,16 +997,16 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); OUT_RINGp (chan, vp->insns[i].data, 4); } } - if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { - so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { + so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); return TRUE; } @@ -1012,7 +1014,7 @@ check_gpu_resources: } void -nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) { vp->translated = FALSE; @@ -1038,11 +1040,10 @@ nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) so_ref(NULL, &vp->so); } -struct nv40_state_entry nv40_state_vertprog = { - .validate = nv40_vertprog_validate, +struct nvfx_state_entry nvfx_state_vertprog = { + .validate = nvfx_vertprog_validate, .dirty = { - .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, - .hw = NV40_STATE_VERTPROG, + .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, + .hw = NVFX_STATE_VERTPROG, } }; - diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 61b54af4dd..a6529b2060 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -14,6 +14,7 @@ C_SOURCES = \ r300_query.c \ r300_render.c \ r300_screen.c \ + r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 923e1e541f..d994a46ccf 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -24,6 +24,7 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_upload_mgr.h" #include "r300_blit.h" #include "r300_context.h" @@ -34,6 +35,7 @@ #include "r300_screen.h" #include "r300_state_invariant.h" #include "r300_texture.h" +#include "r300_transfer.h" #include "radeon_winsys.h" @@ -54,6 +56,9 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } + u_upload_destroy(r300->upload_vb); + u_upload_destroy(r300->upload_ib); + FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); @@ -71,8 +76,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level) { - return pipe->is_buffer_referenced(pipe, - ((struct r300_texture *)texture)->buffer); + return 0; } static unsigned int @@ -156,16 +160,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); - struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys; + struct r300_winsys_screen *rws = r300screen->rws; if (!r300) return NULL; - r300screen->ctx = (struct pipe_context*)r300; + r300->rws = rws; - r300->winsys = radeon_winsys; - - r300->context.winsys = (struct pipe_winsys*)radeon_winsys; + r300->context.winsys = (struct pipe_winsys*)rws; r300->context.screen = screen; r300->context.priv = priv; @@ -209,16 +211,35 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); - /* r300_init_surface_functions(r300); */ + r300_init_transfer_functions(r300); r300_init_state_functions(r300); r300->invariant_state.dirty = TRUE; - r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); + rws->set_flush_cb(r300->rws, r300_flush_cb, r300); r300->dirty_hw++; r300->blitter = util_blitter_create(&r300->context); + r300->upload_ib = u_upload_create(screen, + 32 * 1024, 16, + PIPE_BUFFER_USAGE_INDEX); + + if (r300->upload_ib == NULL) + goto no_upload_ib; + + r300->upload_vb = u_upload_create(screen, + 128 * 1024, 16, + PIPE_BUFFER_USAGE_VERTEX); + if (r300->upload_vb == NULL) + goto no_upload_vb; + return &r300->context; + + no_upload_ib: + u_upload_destroy(r300->upload_ib); + no_upload_vb: + FREE(r300); + return NULL; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 985e339112..db2f74e074 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -32,6 +32,7 @@ #include "r300_screen.h" +struct u_upload_mgr; struct r300_context; struct r300_fragment_shader; @@ -268,7 +269,7 @@ struct r300_texture { boolean is_npot; /* Pipe buffer backing this texture. */ - struct pipe_buffer* buffer; + struct r300_winsys_buffer *buffer; /* Registers carrying texture format data. */ struct r300_texture_format_state state; @@ -302,7 +303,7 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct radeon_winsys* winsys; + struct r300_winsys_screen *rws; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; /* Accelerated blit support. */ @@ -368,6 +369,7 @@ struct r300_context { int vertex_buffer_max_index; /* Vertex elements for Gallium. */ struct r300_vertex_element_state *velems; + bool any_user_vbs; /* Vertex info for Draw. */ struct vertex_info vertex_info; @@ -388,6 +390,9 @@ struct r300_context { uint32_t zbuffer_bpp; /* Whether scissor is enabled. */ boolean scissor_enabled; + /* upload managers */ + struct u_upload_mgr *upload_vb; + struct u_upload_mgr *upload_ib; }; /* Convenience cast wrapper. */ @@ -404,6 +409,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +void r300_init_tex_functions( struct pipe_context *pipe ); static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 151f72b0fe..ad07efbffd 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -51,7 +51,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ + struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ @@ -105,22 +105,34 @@ cs_count--; \ } while (0) -#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 3; \ } while (0) -#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \ + "domains (%d, %d, %d)\n", \ + tex, offset, rd, wd, flags); \ + assert(tex); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \ + cs_count -= 3; \ +} while (0) + + +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ "domains (%d, %d, %d)\n", \ bo, rd, wd, flags); \ assert(bo); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 2; \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index b881730848..d6177577c8 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,7 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit" }, { "tex", DBG_TEX, "Textures" }, { "fall", DBG_FALL, "Fallbacks" }, + { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking purposes only!)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 55e9217fd3..d8c64dd900 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,6 +32,8 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" +#include "r300_state_inlines.h" #include "r300_vs.h" void r300_emit_blend_state(struct r300_context* r300, @@ -415,10 +417,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) assert(tex && tex->buffer && "cbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.colorpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); @@ -434,12 +436,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.depthpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); } @@ -448,7 +450,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) END_CS; } -static void r300_emit_query_start(struct r300_context *r300) +void r300_emit_query_start(struct r300_context *r300) { struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; struct r300_query *query = r300->query_current; @@ -491,13 +493,13 @@ static void r300_emit_query_finish(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), 0, RADEON_GEM_DOMAIN_GTT, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), 0, RADEON_GEM_DOMAIN_GTT, 0); case 2: /* pipe 1 only */ @@ -505,13 +507,13 @@ static void r300_emit_query_finish(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), 0, RADEON_GEM_DOMAIN_GTT, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); break; default: @@ -533,7 +535,7 @@ static void rv530_emit_query_single(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -546,10 +548,10 @@ static void rv530_emit_query_double(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -747,7 +749,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format[2]); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_RELOC(allstate->textures[i]->buffer, texstate->tile_config, + OUT_CS_TEX_RELOC(allstate->textures[i], texstate->tile_config, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); } } @@ -788,8 +790,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) } for (i = 0; i < aos_count; i++) { - OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0, 0); } END_CS; } @@ -814,7 +816,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } @@ -1009,16 +1011,22 @@ void r300_emit_buffer_validate(struct r300_context *r300, unsigned i; boolean invalid = FALSE; + /* upload buffers first */ + if (r300->any_user_vbs) { + r300_upload_user_buffers(r300); + r300->any_user_vbs = false; + } + /* Clean out BOs. */ - r300->winsys->reset_bos(r300->winsys); + r300->rws->reset_bos(r300->rws); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1027,8 +1035,8 @@ validate: if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1038,24 +1046,24 @@ validate: tex = texstate->textures[i]; if (!tex || !texstate->sampler_states[i]) continue; - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + if (!r300_add_texture(r300->rws, tex, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...occlusion query buffer... */ if (r300->dirty_state & R300_NEW_QUERY) { - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { + if (!r300_add_buffer(r300->rws, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...vertex buffer for SWTCL path... */ if (r300->vbo) { - if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, - RADEON_GEM_DOMAIN_GTT, 0)) { + if (!r300_add_buffer(r300->rws, r300->vbo, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1065,23 +1073,22 @@ validate: for (i = 0; i < r300->velems->count; i++) { pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!r300->winsys->add_buffer(r300->winsys, pbuf, - RADEON_GEM_DOMAIN_GTT, 0)) { - r300->context.flush(&r300->context, 0, NULL); + if (!r300_add_buffer(r300->rws, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); goto validate; } } } /* ...and index buffer for HWTCL path. */ if (index_buffer) { - if (!r300->winsys->add_buffer(r300->winsys, index_buffer, - RADEON_GEM_DOMAIN_GTT, 0)) { + if (!r300_add_buffer(r300->rws, index_buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } - - if (!r300->winsys->validate(r300->winsys)) { + if (!r300->rws->validate(r300->rws)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 449e640a88..7db2fc6a1a 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -57,8 +57,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_query_begin(struct r300_context* r300, - struct r300_query* query); +void r300_emit_query_start(struct r300_context* r300); void r300_emit_query_end(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index a249e8b36b..1c2b252887 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -540,7 +540,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST(x) ((x) << 0) # define R300_PVS_XYZW_VALID_INST(x) ((x) << 10) # define R300_PVS_LAST_INST(x) ((x) << 20) -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1500,6 +1500,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ANISO_THRESHOLD_MASK (7<<17) # define R500_MACRO_SWITCH (1<<22) +# define R500_TX_MAX_ANISO(x) ((x) << 23) +# define R500_TX_MAX_ANISO_MASK (63 << 23) +# define R500_TX_ANISO_HIGH_QUALITY (1 << 30) + # define R500_BORDER_FIX (1<<31) #define R300_TX_FORMAT0_0 0x4480 @@ -1857,7 +1861,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 9c001ae186..47100c83b0 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -30,10 +30,12 @@ #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_upload_mgr.h" #include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" +#include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_render.h" @@ -123,7 +125,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, static boolean r300_reserve_cs_space(struct r300_context *r300, unsigned dwords) { - if (!r300->winsys->check_cs(r300->winsys, dwords)) { + if (!r300->rws->check_cs(r300->rws, dwords)) { r300->context.flush(&r300->context, 0, NULL); return TRUE; } @@ -131,9 +133,37 @@ static boolean r300_reserve_cs_space(struct r300_context *r300, } static boolean immd_is_good_idea(struct r300_context *r300, - unsigned count) + unsigned count) { - return count <= 4; + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + boolean checked[PIPE_MAX_ATTRIBS] = {0}; + unsigned vertex_element_count = r300->velems->count; + unsigned i, vbi; + + if (count > 4) { + return FALSE; + } + + /* We shouldn't map buffers referenced by CS, busy buffers, + * and ones placed in VRAM. */ + /* XXX Check for VRAM buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->velems->velem[i]; + vbi = velem->vertex_buffer_index; + + if (!checked[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + + if (r300_buffer_is_referenced(r300, + vbuf->buffer)) { + /* It's a very bad idea to map it... */ + return FALSE; + } + checked[vbi] = TRUE; + } + } + return TRUE; } static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -274,14 +304,14 @@ static void r300_emit_draw_elements(struct r300_context *r300, #endif CS_LOCALS(r300); - assert((start * indexSize) % 4 == 0); + assert((start * indexSize) % 4 == 0); assert(count < (1 << 24)); + maxIndex = MIN3(maxIndex, r300->vertex_buffer_max_index, count - minIndex); + DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", count, minIndex, maxIndex); - maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); - if (alt_num_verts) { BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); @@ -316,8 +346,8 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_RELOC(indexBuffer, count_dwords, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } @@ -384,12 +414,16 @@ void r300_draw_range_elements(struct pipe_context* pipe, r300_update_derived_state(r300); + r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); + /* 128 dwords for emit_aos and emit_draw_elements */ r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); r300_emit_aos(r300, 0); + u_upload_flush(r300->upload_vb); + u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); @@ -412,7 +446,7 @@ void r300_draw_range_elements(struct pipe_context* pipe, } if (indexBuffer != orgIndexBuffer) { - pipe->screen->buffer_destroy(indexBuffer); + pipe_buffer_reference( &indexBuffer, NULL ); } } @@ -476,6 +510,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } } while (count); } + u_upload_flush(r300->upload_vb); } } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 5880eecd5f..3e31688f8e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -23,14 +23,13 @@ #include "util/u_format.h" #include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "r300_context.h" #include "r300_texture.h" -#include "r300_transfer.h" #include "radeon_winsys.h" -#include "r300_winsys.h" + +#include "r300_screen_buffer.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -255,15 +254,19 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, static void r300_destroy_screen(struct pipe_screen* pscreen) { struct r300_screen* r300screen = r300_screen(pscreen); + struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + + if (rws) + rws->destroy(rws); FREE(r300screen->caps); FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) { - struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); - struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); + struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); + struct r300_capabilities *caps = CALLOC_STRUCT(r300_capabilities); if (!r300screen || !caps) { FREE(r300screen); @@ -271,16 +274,16 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) return NULL; } - caps->pci_id = radeon_winsys->pci_id; - caps->num_frag_pipes = radeon_winsys->gb_pipes; - caps->num_z_pipes = radeon_winsys->z_pipes; + caps->pci_id = rws->get_value(rws, R300_VID_PCI_ID); + caps->num_frag_pipes = rws->get_value(rws, R300_VID_GB_PIPES); + caps->num_z_pipes = rws->get_value(rws, R300_VID_Z_PIPES); r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->radeon_winsys = radeon_winsys; - r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; + r300screen->rws = rws; + r300screen->screen.winsys = (struct pipe_winsys*)rws; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; @@ -290,9 +293,13 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) r300screen->screen.context_create = r300_create_context; r300_init_screen_texture_functions(&r300screen->screen); - r300_init_screen_transfer_functions(&r300screen->screen); - u_simple_screen_init(&r300screen->screen); + r300_screen_init_buffer_functions(r300screen); return &r300screen->screen; } +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen) +{ + return r300_screen(screen)->rws; +} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 484bde6a6b..1ccc0bfb7a 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -36,11 +36,7 @@ struct r300_screen { /* Parent class */ struct pipe_screen screen; - struct radeon_winsys* radeon_winsys; - - /* XXX This hack will be removed once texture transfers become part of - * pipe_context. */ - struct pipe_context* ctx; + struct r300_winsys_screen *rws; /* Chipset capabilities */ struct r300_capabilities* caps; @@ -55,9 +51,6 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } -/* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); - /* Debug functionality. */ /** @@ -77,6 +70,7 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); #define DBG_DRAW 0x0000010 #define DBG_TEX 0x0000020 #define DBG_FALL 0x0000040 +#define DBG_ANISOHQ 0x0000080 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c new file mode 100644 index 0000000000..b97d0d76a4 --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -0,0 +1,314 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + */ +#include <stdio.h> + +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_math.h" + +#include "r300_screen_buffer.h" + +#include "r300_winsys.h" + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + if (r300_buffer_is_user_buffer(buf)) + return FALSE; + + return r300->rws->is_buffer_referenced(r300->rws, rbuf->buf); + +} +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count) +{ + struct pipe_buffer *upload_buffer = NULL; + unsigned index_offset = start * index_size; + int ret = 0; + + if (r300_buffer_is_user_buffer(*index_buffer)) { + ret = u_upload_buffer(r300->upload_ib, + index_offset, + count * index_size, + *index_buffer, + &index_offset, + &upload_buffer); + if (ret) { + goto done; + } + *index_buffer = upload_buffer; + } + done: + // if (upload_buffer) + // pipe_buffer_reference(&upload_buffer, NULL); + return ret; +} + +int r300_upload_user_buffers(struct r300_context *r300) +{ + enum pipe_error ret = PIPE_OK; + int i, nr; + + nr = r300->vertex_buffer_count; + + for (i = 0; i < nr; i++) { + + if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) { + struct pipe_buffer *upload_buffer = NULL; + unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/ + unsigned size = r300->vertex_buffer[i].buffer->size; + unsigned upload_offset; + ret = u_upload_buffer(r300->upload_vb, + offset, size, + r300->vertex_buffer[i].buffer, + &upload_offset, &upload_buffer); + if (ret) + return ret; + + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); + r300->vertex_buffer[i].buffer = upload_buffer; + r300->vertex_buffer[i].buffer_offset = upload_offset; + } + } + return ret; +} + +static struct r300_winsys_buffer * +r300_winsys_buffer_create(struct r300_screen *r300screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_winsys_buffer *buf; + + buf = rws->buffer_create(rws, alignment, usage, size); + return buf; +} + +static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, + struct r300_buffer *rbuf) +{ + struct r300_winsys_screen *rws = r300screen->rws; + + if (rbuf->buf) { + rws->buffer_reference(rws, &rbuf->buf, NULL); + rbuf->buf = NULL; + } +} + +static struct pipe_buffer *r300_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto error1; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = alignment; + rbuf->base.usage = usage; + rbuf->base.size = size; + + rbuf->buf = r300_winsys_buffer_create(r300screen, + alignment, + usage, + size); + + if (!rbuf->buf) + goto error2; + + return &rbuf->base; +error2: + FREE(rbuf); +error1: + return NULL; +} + + +static struct pipe_buffer *r300_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto no_rbuf; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = 1; + rbuf->base.usage = 0; + rbuf->base.size = bytes; + + rbuf->user_buffer = ptr; + return &rbuf->base; + +no_rbuf: + return NULL; +} + +static void r300_buffer_destroy(struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(buf->screen); + struct r300_buffer *rbuf = r300_buffer(buf); + + r300_winsys_buffer_destroy(r300screen, rbuf); + FREE(rbuf); +} + +static void * +r300_buffer_map_range(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length, + unsigned usage ) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + int flush = 0; + int i; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + goto just_map; + + /* check if the mapping is to a range we already flushed */ + if (usage & PIPE_BUFFER_USAGE_DISCARD) { + for (i = 0; i < rbuf->num_ranges; i++) { + + if ((offset >= rbuf->ranges[i].start) && + (offset < rbuf->ranges[i].end)) + flush = 1; + + if (flush) { + /* unreference this hw buffer and allocate a new one */ + rws->buffer_reference(rws, &rbuf->buf, NULL); + + rbuf->num_ranges = 0; + rbuf->map = NULL; + rbuf->buf = r300_winsys_buffer_create(r300screen, + rbuf->base.alignment, + rbuf->base.usage, + rbuf->base.size); + break; + } + } + } +just_map: + map = rws->buffer_map(rws, rbuf->buf, usage | R300_USAGE_FLAG_DONT_SYNC); + + return map; +} + +static void +r300_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, + unsigned length ) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + int i; + + if (rbuf->user_buffer) + return; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + return; + + /* mark the range as used */ + for(i = 0; i < rbuf->num_ranges; ++i) { + if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+length)) { + rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); + rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); + return; + } + } + + rbuf->ranges[rbuf->num_ranges].start = offset; + rbuf->ranges[rbuf->num_ranges].end = offset+length; + rbuf->num_ranges++; +} + +static void * +r300_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + map = rws->buffer_map(rws, rbuf->buf, usage); + + return map; +} + +static void +r300_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + + if (rbuf->buf) { + rws->buffer_unmap(rws, rbuf->buf); + } +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen) +{ + r300screen->screen.buffer_create = r300_buffer_create; + r300screen->screen.user_buffer_create = r300_user_buffer_create; + r300screen->screen.buffer_map = r300_buffer_map; + r300screen->screen.buffer_map_range = r300_buffer_map_range; + r300screen->screen.buffer_flush_mapped_range = r300_buffer_flush_mapped_range; + r300screen->screen.buffer_unmap = r300_buffer_unmap; + r300screen->screen.buffer_destroy = r300_buffer_destroy; +} diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h new file mode 100644 index 0000000000..0cf349c25c --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -0,0 +1,99 @@ +#ifndef R300_SCREEN_BUFFER_H +#define R300_SCREEN_BUFFER_H +#include <stdio.h> +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "r300_screen.h" + +#include "r300_winsys.h" +#include "r300_context.h" + +#define R300_BUFFER_MAGIC 0xabcd1234 + +struct r300_buffer_range { + uint32_t start; + uint32_t end; +}; +#define R300_BUFFER_MAX_RANGES 32 + +struct r300_buffer +{ + struct pipe_buffer base; + + uint32_t magic; + + struct r300_winsys_buffer *buf; + + void *user_buffer; + struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; + unsigned num_ranges; + + void *map; +}; + +static INLINE struct r300_buffer * +r300_buffer(struct pipe_buffer *buffer) +{ + if (buffer) { + assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC); + return (struct r300_buffer *)buffer; + } + return NULL; +} + +static INLINE boolean +r300_buffer_is_user_buffer(struct pipe_buffer *buffer) +{ + return r300_buffer(buffer)->user_buffer ? true : false; +} + +static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws, + struct pipe_buffer *buffer, + int rd, int wr) +{ + struct r300_buffer *buf = r300_buffer(buffer); + + if (!buf->buf) + return true; + + return rws->add_buffer(rws, buf->buf, rd, wr); +} + + +static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, + struct r300_texture *tex, + int rd, int wr) +{ + return rws->add_buffer(rws, tex->buffer, rd, wr); +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen); + +static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, + struct r300_buffer *buf, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + if (!buf->buf) + return; + + rws->write_cs_reloc(rws, buf->buf, rd, wd, flags); +} + +static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, + struct r300_texture *texture, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); +} + +int r300_upload_user_buffers(struct r300_context *r300); + +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count); + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf); +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bd4c2766cb..712e9280e3 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -34,6 +34,7 @@ #include "r300_context.h" #include "r300_reg.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_vs.h" @@ -525,7 +526,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->cbufs[i]->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -537,7 +538,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->zsbuf->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -549,7 +550,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->cbufs[i]->texture; level = new_state->cbufs[i]->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -558,7 +559,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->zsbuf->texture; level = new_state->zsbuf->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -849,6 +850,7 @@ static void* { struct r300_context* r300 = r300_context(pipe); struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); + boolean is_r500 = r300_screen(pipe->screen)->caps->is_r500; int lod_bias; union util_color uc; @@ -864,6 +866,8 @@ static void* state->min_mip_filter, state->max_anisotropy > 0); + sampler->filter0 |= r300_anisotropy(state->max_anisotropy); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the merge function to clamp them properly. */ sampler->min_lod = MAX2((unsigned)state->min_lod, 0); @@ -873,7 +877,13 @@ static void* sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; - sampler->filter1 |= r300_anisotropy(state->max_anisotropy); + /* This is very high quality anisotropic filtering for R5xx. + * It's good for benchmarking the performance of texturing but + * in practice we don't want to slow down the driver because it's + * a pretty good performance killer. Feel free to play with it. */ + if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) { + sampler->filter1 |= r500_anisotropy(state->max_anisotropy); + } util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); sampler->border_color = uc.ui; @@ -1031,17 +1041,30 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); - unsigned i, max_index = ~0; + int i; + unsigned max_index = (1 << 24) - 1; + boolean any_user_buffer = false; - memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); + if (count == r300->vertex_buffer_count && + memcmp(r300->vertex_buffer, buffers, count * sizeof(buffers[0])) == 0) + return; for (i = 0; i < count; i++) { + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); + if (r300_buffer_is_user_buffer(buffers[i].buffer)) + any_user_buffer = true; max_index = MIN2(buffers[i].max_index, max_index); } + for ( ; i < r300->vertex_buffer_count; i++) + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); + + memcpy(r300->vertex_buffer, buffers, + sizeof(struct pipe_vertex_buffer) * count); + r300->vertex_buffer_count = count; r300->vertex_buffer_max_index = max_index; + r300->any_user_vbs = any_user_buffer; if (r300->draw) { draw_flush(r300->draw); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index a32924ed0a..8485d4f8f9 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -327,6 +327,18 @@ static INLINE uint32_t r300_anisotropy(unsigned max_aniso) } } +static INLINE uint32_t r500_anisotropy(unsigned max_aniso) +{ + if (!max_aniso) { + return 0; + } + max_aniso -= 1; + + // Map the range [0, 15] to [0, 63]. + return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) | + R500_TX_ANISO_HIGH_QUALITY;; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 04124afd4d..7c7656068b 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -773,7 +773,7 @@ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); struct r300_screen* rscreen = r300_screen(screen); - struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; if (!tex) { return NULL; @@ -790,13 +790,13 @@ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, r300_setup_miptree(rscreen, tex); r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 2048, - PIPE_BUFFER_USAGE_PIXEL, - tex->size); - winsys->buffer_set_tiling(winsys, tex->buffer, - tex->pitch[0], - tex->microtile != R300_BUFFER_LINEAR, - tex->macrotile != R300_BUFFER_LINEAR); + tex->buffer = rws->buffer_create(rws, 2048, + PIPE_BUFFER_USAGE_PIXEL, + tex->size); + rws->buffer_set_tiling(rws, tex->buffer, + tex->pitch[0], + tex->microtile != R300_BUFFER_LINEAR, + tex->macrotile != R300_BUFFER_LINEAR); if (!tex->buffer) { FREE(tex); @@ -809,9 +809,9 @@ static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, static void r300_texture_destroy(struct pipe_texture* texture) { struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; - pipe_buffer_reference(&tex->buffer, NULL); - + rws->buffer_reference(rws, &tex->buffer, NULL); FREE(tex); } @@ -857,9 +857,9 @@ static struct pipe_texture* const struct pipe_texture* base, struct winsys_handle *whandle) { - struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen* rscreen = r300_screen(screen); - struct pipe_buffer *buffer; + struct r300_winsys_buffer *buffer; struct r300_texture* tex; unsigned stride; @@ -870,7 +870,7 @@ static struct pipe_texture* return NULL; } - buffer = winsys->buffer_from_handle(winsys, screen, whandle, &stride); + buffer = rws->buffer_from_handle(rws, screen, whandle, &stride); if (!buffer) { return NULL; } @@ -901,7 +901,7 @@ static boolean struct pipe_texture *texture, struct winsys_handle *whandle) { - struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; struct r300_texture* tex = (struct r300_texture*)texture; unsigned stride; @@ -911,7 +911,7 @@ static boolean stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - winsys->buffer_get_handle(winsys, tex->buffer, stride, whandle); + rws->buffer_get_handle(rws, tex->buffer, stride, whandle); return TRUE; } @@ -977,3 +977,25 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) screen->video_surface_destroy= r300_video_surface_destroy; } +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, + struct r300_winsys_buffer** buffer, + unsigned* stride) +{ + struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct r300_winsys_buffer *buf; + + if (!tex) { + return FALSE; + } + + rws->buffer_reference(rws, &buf, tex->buffer); + + if (stride) { + *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); + } + + *buffer = buf; + return TRUE; +} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 138b62784e..60c7fa8342 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -63,8 +63,8 @@ r300_video_surface(struct pipe_video_surface *pvs) /* Used internally for texture_is_referenced() */ boolean r300_get_texture_buffer(struct pipe_screen* screen, - struct pipe_texture* texture, - struct pipe_buffer** buffer, + struct pipe_texture *texture, + struct r300_winsys_buffer** buffer, unsigned* stride); #endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index ec89681a3c..987a040698 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -26,6 +26,8 @@ #include "r300_texture.h" #include "r300_screen.h" +#include "r300_winsys.h" + #include "util/u_memory.h" #include "util/u_format.h" @@ -118,15 +120,15 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, } static struct pipe_transfer* -r300_get_tex_transfer(struct pipe_screen *screen, +r300_get_tex_transfer(struct pipe_context *ctx, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, unsigned w, unsigned h) { struct r300_texture *tex = (struct r300_texture *)texture; + struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; - struct r300_screen *r300screen = r300_screen(screen); struct pipe_texture template; trans = CALLOC_STRUCT(r300_transfer); @@ -136,7 +138,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, trans->transfer.usage = usage; trans->transfer.width = w; trans->transfer.height = h; - trans->ctx = r300screen->ctx; + trans->ctx = ctx; trans->x = x; trans->y = y; trans->level = level; @@ -174,8 +176,10 @@ r300_get_tex_transfer(struct pipe_screen *screen, } /* Create the temporary texture. */ - trans->detiled_texture = - (struct r300_texture*)screen->texture_create(screen, &template); + trans->detiled_texture = (struct r300_texture*) + ctx->screen->texture_create(ctx->screen, + &template); + assert(!trans->detiled_texture->microtile && !trans->detiled_texture->macrotile); @@ -187,7 +191,7 @@ r300_get_tex_transfer(struct pipe_screen *screen, if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ - r300_copy_from_tiled_texture(r300screen->ctx, trans); + r300_copy_from_tiled_texture(ctx, trans); } } else { trans->transfer.x = x; @@ -203,7 +207,8 @@ r300_get_tex_transfer(struct pipe_screen *screen, return &trans->transfer; } -static void r300_tex_transfer_destroy(struct pipe_transfer *trans) +static void r300_tex_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) { struct r300_transfer *r300transfer = r300_transfer(trans); @@ -219,9 +224,10 @@ static void r300_tex_transfer_destroy(struct pipe_transfer *trans) FREE(trans); } -static void* r300_transfer_map(struct pipe_screen *screen, +static void* r300_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = (struct r300_texture*)transfer->texture; char *map; @@ -230,12 +236,12 @@ static void* r300_transfer_map(struct pipe_screen *screen, if (r300transfer->detiled_texture) { /* The detiled texture is of the same size as the region being mapped * (no offset needed). */ - return pipe_buffer_map(screen, + return rws->buffer_map(rws, r300transfer->detiled_texture->buffer, pipe_transfer_buffer_flags(transfer)); } else { /* Tiling is disabled. */ - map = pipe_buffer_map(screen, tex->buffer, + map = rws->buffer_map(rws, tex->buffer, pipe_transfer_buffer_flags(transfer)); if (!map) { @@ -248,23 +254,27 @@ static void* r300_transfer_map(struct pipe_screen *screen, } } -static void r300_transfer_unmap(struct pipe_screen *screen, +static void r300_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; struct r300_transfer *r300transfer = r300_transfer(transfer); struct r300_texture *tex = (struct r300_texture*)transfer->texture; if (r300transfer->detiled_texture) { - pipe_buffer_unmap(screen, r300transfer->detiled_texture->buffer); + rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); } else { - pipe_buffer_unmap(screen, tex->buffer); + rws->buffer_unmap(rws, tex->buffer); } } -void r300_init_screen_transfer_functions(struct pipe_screen *screen) + +void r300_init_transfer_functions( struct r300_context *r300ctx ) { - screen->get_tex_transfer = r300_get_tex_transfer; - screen->tex_transfer_destroy = r300_tex_transfer_destroy; - screen->transfer_map = r300_transfer_map; - screen->transfer_unmap = r300_transfer_unmap; + struct pipe_context *ctx = &r300ctx->context; + + ctx->get_tex_transfer = r300_get_tex_transfer; + ctx->tex_transfer_destroy = r300_tex_transfer_destroy; + ctx->transfer_map = r300_transfer_map; + ctx->transfer_unmap = r300_transfer_unmap; } diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h index 60d1d3dc85..79baf6d048 100644 --- a/src/gallium/drivers/r300/r300_transfer.h +++ b/src/gallium/drivers/r300/r300_transfer.h @@ -26,6 +26,8 @@ #include "pipe/p_screen.h" -void r300_init_screen_transfer_functions(struct pipe_screen *screen); +struct r300_context; + +void r300_init_transfer_functions(struct r300_context *r300ctx); #endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 379939ac75..bd6b95dccb 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -34,8 +34,6 @@ #include "radeon_compiler.h" -#include "util/u_math.h" - /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index ddf2b79003..e5183a8239 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -23,10 +23,6 @@ #ifndef R300_WINSYS_H #define R300_WINSYS_H -#ifdef __cplusplus -extern "C" { -#endif - /* The public interface header for the r300 pipe driver. * Any winsys hosting this pipe needs to implement r300_winsys and then * call r300_create_screen to start things. */ @@ -34,14 +30,146 @@ extern "C" { #include "pipe/p_defines.h" #include "pipe/p_state.h" -struct radeon_winsys; +struct r300_winsys_screen; /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws); + +struct r300_winsys_buffer; + + +boolean r300_get_texture_buffer(struct pipe_screen* screen, + struct pipe_texture* texture, + struct r300_winsys_buffer** buffer, + unsigned *stride); + +enum r300_value_id { + R300_VID_PCI_ID, + R300_VID_GB_PIPES, + R300_VID_Z_PIPES, +}; + +#define R300_USAGE_FLAG_DONT_SYNC (1 << 17) + +struct r300_winsys_screen { + void (*destroy)(struct r300_winsys_screen *ws); + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + unsigned alignment, + unsigned usage, + unsigned size); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned usage); + + void (*buffer_unmap)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf ); + + void (*buffer_destroy)( struct r300_winsys_buffer *buf ); + + + void (*buffer_reference)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src); + + boolean (*buffer_references)(struct r300_winsys_buffer *a, + struct r300_winsys_buffer *b); + + void (*buffer_flush_range)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, + unsigned offset, + unsigned length); + + /* Add a pipe_buffer to the list of buffer objects to validate. */ + boolean (*add_buffer)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd); + + + /* Revalidate all currently setup pipe_buffers. + * Returns TRUE if a flush is required. */ + boolean (*validate)(struct r300_winsys_screen* winsys); + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct r300_winsys_screen* winsys, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct r300_winsys_screen* winsys, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct r300_winsys_screen* winsys, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct r300_winsys_screen* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct r300_winsys_screen *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct r300_winsys_screen *winsys); + + void (*buffer_set_tiling)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + uint32_t pitch, + boolean microtiled, + boolean macrotiled); + + uint32_t (*get_value)(struct r300_winsys_screen *winsys, + enum r300_value_id vid); + + struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, + struct pipe_screen *screen, + struct winsys_handle *whandle, + unsigned *stride); + boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + unsigned stride, + struct winsys_handle *whandle); + + boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer); + +}; -#ifdef __cplusplus -} -#endif +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen); #endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 30494719f7..de92a0cd2c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -44,6 +44,7 @@ #include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" +#include "sp_texture.h" #include "sp_query.h" @@ -275,6 +276,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( &softpipe->pipe ); softpipe->pipe.render_condition = softpipe_render_condition; @@ -283,13 +285,13 @@ softpipe_create_context( struct pipe_screen *screen, * Must be before quad stage setup! */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); - softpipe->zsbuf_cache = sp_create_tile_cache( screen ); + softpipe->cbuf_cache[i] = sp_create_tile_cache( &softpipe->pipe ); + softpipe->zsbuf_cache = sp_create_tile_cache( &softpipe->pipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); + softpipe->tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen); + softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); } /* setup quad rendering stages */ diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e8952bf4fb..3d76af4d8c 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -93,9 +93,9 @@ softpipe_flush( struct pipe_context *pipe, static unsigned frame_no = 1; static char filename[256]; util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.cbufs[0]); util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.zsbuf); ++frame_no; } #endif diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index d62bfa3d63..757dc86128 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -173,8 +173,10 @@ softpipe_is_format_supported( struct pipe_screen *screen, break; } - if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if(!winsys->is_displaytarget_format_supported(winsys, format)) + if(tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { + if(!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) return FALSE; } diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index a0b95c8884..e3a5e37ce4 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -43,14 +43,14 @@ struct softpipe_tex_tile_cache * -sp_create_tex_tile_cache( struct pipe_screen *screen ) +sp_create_tex_tile_cache( struct pipe_context *pipe ) { struct softpipe_tex_tile_cache *tc; uint pos; tc = CALLOC_STRUCT( softpipe_tex_tile_cache ); if (tc) { - tc->screen = screen; + tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].addr.bits.invalid = 1; } @@ -63,19 +63,16 @@ sp_create_tex_tile_cache( struct pipe_screen *screen ) void sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc) { - struct pipe_screen *screen; uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ } if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer); } if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); } FREE( tc ); @@ -88,7 +85,7 @@ void sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc) { if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); + tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); } @@ -96,7 +93,7 @@ void sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc) { if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } } @@ -133,14 +130,12 @@ sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, pipe_texture_reference(&tc->texture, texture); if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); tc->tex_trans = NULL; } @@ -204,7 +199,6 @@ const struct softpipe_tex_cached_tile * sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, union tex_tile_address addr ) { - struct pipe_screen *screen = tc->screen; struct softpipe_tex_cached_tile *tile; tile = tc->entries + tex_cache_pos( addr ); @@ -232,16 +226,16 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, if (tc->tex_trans) { if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); tc->tex_trans = NULL; } tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, + tc->pipe->get_tex_transfer(tc->pipe, tc->texture, addr.bits.face, addr.bits.level, addr.bits.z, @@ -249,7 +243,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, u_minify(tc->texture->width0, addr.bits.level), u_minify(tc->texture->height0, addr.bits.level)); - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); + tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); tc->tex_face = addr.bits.face; tc->tex_level = addr.bits.level; @@ -257,7 +251,8 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } /* get tile from the transfer (view into texture) */ - pipe_get_tile_rgba(tc->tex_trans, + pipe_get_tile_rgba(tc->pipe, + tc->tex_trans, addr.bits.x * TILE_SIZE, addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index ac6886a3df..b116397258 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -70,7 +70,7 @@ struct softpipe_tex_cached_tile struct softpipe_tex_tile_cache { - struct pipe_screen *screen; + struct pipe_context *pipe; struct pipe_transfer *transfer; void *transfer_map; @@ -88,7 +88,7 @@ struct softpipe_tex_tile_cache extern struct softpipe_tex_tile_cache * -sp_create_tex_tile_cache( struct pipe_screen *screen ); +sp_create_tex_tile_cache( struct pipe_context *pipe ); extern void sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 11d184effb..f4983b7c8e 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -36,7 +36,6 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "sp_context.h" #include "sp_texture.h" @@ -92,6 +91,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, /* Round up the surface size to a multiple of the tile size? */ spt->dt = winsys->displaytarget_create(winsys, + spt->base.tex_usage, spt->base.format, spt->base.width0, spt->base.height0, @@ -140,8 +140,6 @@ softpipe_texture_create(struct pipe_screen *screen, } - - static void softpipe_texture_destroy(struct pipe_texture *pt) { @@ -162,6 +160,55 @@ softpipe_texture_destroy(struct pipe_texture *pt) } +static struct pipe_texture * +softpipe_texture_from_handle(struct pipe_screen *screen, + const struct pipe_texture *template, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *template; + pipe_reference_init(&spt->base.reference, 1); + spt->base.screen = screen; + + spt->pot = (util_is_power_of_two(template->width0) && + util_is_power_of_two(template->height0) && + util_is_power_of_two(template->depth0)); + + spt->dt = winsys->displaytarget_from_handle(winsys, + template, + whandle, + &spt->stride[0]); + if (!spt->dt) + goto fail; + + return &spt->base; + + fail: + FREE(spt); + return NULL; +} + + +static boolean +softpipe_texture_get_handle(struct pipe_screen *screen, + struct pipe_texture *pt, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + + assert(spt->dt); + if (!spt->dt) + return FALSE; + + return winsys->displaytarget_get_handle(winsys, spt->dt, whandle); +} + + /** * Get a pipe_surface "view" into a texture. */ @@ -256,7 +303,7 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf) * \param height height of region to read/write */ static struct pipe_transfer * -softpipe_get_tex_transfer(struct pipe_screen *screen, +softpipe_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -310,7 +357,8 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, * softpipe_get_tex_transfer(). */ static void -softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) +softpipe_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -326,7 +374,7 @@ softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) * Create memory mapping for given pipe_transfer object. */ static void * -softpipe_transfer_map( struct pipe_screen *screen, +softpipe_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { ubyte *map, *xfer_map; @@ -339,7 +387,7 @@ softpipe_transfer_map( struct pipe_screen *screen, if (spt->dt) { /* display target */ - struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; map = winsys->displaytarget_map(winsys, spt->dt, pipe_transfer_buffer_flags(transfer)); @@ -359,7 +407,7 @@ softpipe_transfer_map( struct pipe_screen *screen, /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ - softpipe_screen(screen)->timestamp++; + softpipe_screen(pipe->screen)->timestamp++; } xfer_map = map + softpipe_transfer(transfer)->offset + @@ -374,7 +422,7 @@ softpipe_transfer_map( struct pipe_screen *screen, * Unmap memory mapping for given pipe_transfer object. */ static void -softpipe_transfer_unmap(struct pipe_screen *screen, +softpipe_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -384,7 +432,7 @@ softpipe_transfer_unmap(struct pipe_screen *screen, if (spt->dt) { /* display target */ - struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; winsys->displaytarget_unmap(winsys, spt->dt); } @@ -448,19 +496,25 @@ softpipe_video_surface_destroy(struct pipe_video_surface *vsfc) void +softpipe_init_texture_funcs(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = softpipe_get_tex_transfer; + pipe->tex_transfer_destroy = softpipe_tex_transfer_destroy; + pipe->transfer_map = softpipe_transfer_map; + pipe->transfer_unmap = softpipe_transfer_unmap; +} + +void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; screen->texture_destroy = softpipe_texture_destroy; + screen->texture_from_handle = softpipe_texture_from_handle; + screen->texture_get_handle = softpipe_texture_get_handle; screen->get_tex_surface = softpipe_get_tex_surface; screen->tex_surface_destroy = softpipe_tex_surface_destroy; - screen->get_tex_transfer = softpipe_get_tex_transfer; - screen->tex_transfer_destroy = softpipe_tex_transfer_destroy; - screen->transfer_map = softpipe_transfer_map; - screen->transfer_unmap = softpipe_transfer_unmap; - screen->video_surface_create = softpipe_video_surface_create; screen->video_surface_destroy = softpipe_video_surface_destroy; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 1c8636d1d5..c0e6ba8a86 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -107,5 +107,8 @@ softpipe_video_surface(struct pipe_video_surface *pvs) extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); +void +softpipe_init_texture_funcs(struct pipe_context *pipe); + #endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index aedfdf1b46..1c3c2667d7 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -79,20 +79,20 @@ clear_clear_flag(uint *bitvec, union tile_address addr) struct softpipe_tile_cache * -sp_create_tile_cache( struct pipe_screen *screen ) +sp_create_tile_cache( struct pipe_context *pipe ) { struct softpipe_tile_cache *tc; uint pos; int maxLevels, maxTexSize; /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxLevels = pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); maxTexSize = 1 << (maxLevels - 1); assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { - tc->screen = screen; + tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].addr.bits.invalid = 1; } @@ -115,15 +115,13 @@ sp_create_tile_cache( struct pipe_screen *screen ) void sp_destroy_tile_cache(struct softpipe_tile_cache *tc) { - struct pipe_screen *screen; uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ } if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer); } FREE( tc ); @@ -137,27 +135,25 @@ void sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, struct pipe_surface *ps) { - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; + struct pipe_context *pipe = tc->pipe; + if (tc->transfer) { if (ps == tc->surface) return; if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); + pipe->transfer_unmap(pipe, tc->transfer); tc->transfer_map = NULL; } - screen->tex_transfer_destroy(tc->transfer); + pipe->tex_transfer_destroy(pipe, tc->transfer); tc->transfer = NULL; } tc->surface = ps; if (ps) { - struct pipe_screen *screen = ps->texture->screen; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, + tc->transfer = pipe->get_tex_transfer(pipe, ps->texture, ps->face, ps->level, ps->zslice, PIPE_TRANSFER_READ_WRITE, 0, 0, ps->width, ps->height); @@ -187,7 +183,7 @@ void sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); + tc->transfer_map = tc->pipe->transfer_map(tc->pipe, tc->transfer); } @@ -195,7 +191,7 @@ void sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); + tc->pipe->transfer_unmap(tc->pipe, tc->transfer); tc->transfer_map = NULL; } } @@ -295,7 +291,8 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) union tile_address addr = tile_address(x, y); if (is_clear_flag_set(tc->clear_flags, addr)) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, + pt, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); @@ -329,14 +326,14 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) struct softpipe_cached_tile *tile = tc->entries + pos; if (!tile->addr.bits.invalid) { if (tc->depth_stencil) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pt, + pipe_put_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, @@ -379,14 +376,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pt, + pipe_put_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, @@ -409,14 +406,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, else { /* get new tile data from transfer */ if (tc->depth_stencil) { - pipe_get_tile_raw(pt, + pipe_get_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_get_tile_rgba(pt, + pipe_get_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index a12092702a..753d8c0daa 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -80,7 +80,7 @@ struct softpipe_cached_tile struct softpipe_tile_cache { - struct pipe_screen *screen; + struct pipe_context *pipe; struct pipe_surface *surface; /**< the surface we're caching */ struct pipe_transfer *transfer; void *transfer_map; @@ -98,7 +98,7 @@ struct softpipe_tile_cache extern struct softpipe_tile_cache * -sp_create_tile_cache( struct pipe_screen *screen ); +sp_create_tile_cache( struct pipe_context *pipe ); extern void sp_destroy_tile_cache(struct softpipe_tile_cache *tc); diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index d499ae6acc..adb7840182 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -164,6 +164,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga_init_constbuffer_functions(svga); svga_init_query_functions(svga); + svga_init_texture_functions(&svga->pipe); + /* debug */ svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 5b581debfc..4a058eda88 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -41,8 +41,6 @@ #include "svga_debug.h" #include "svga_screen_buffer.h" -#include <util/u_string.h> - /* XXX: This isn't a real hardware flag, but just a hack for kernel to * know about primary surfaces. Find a better way to accomplish this. @@ -783,15 +781,17 @@ svga_surface_needs_propagation(struct pipe_surface *surf) return s->dirty && s->handle != tex->handle; } - +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static struct pipe_transfer * -svga_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, unsigned x, unsigned y, - unsigned w, unsigned h) +svga_get_tex_transfer(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st; unsigned nblocksx = util_format_get_nblocksx(texture->format, w); @@ -859,11 +859,14 @@ no_hwbuf: } +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static void * -svga_transfer_map( struct pipe_screen *screen, +svga_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -877,11 +880,14 @@ svga_transfer_map( struct pipe_screen *screen, } +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static void -svga_transfer_unmap(struct pipe_screen *screen, +svga_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -891,10 +897,11 @@ svga_transfer_unmap(struct pipe_screen *screen, static void -svga_tex_transfer_destroy(struct pipe_transfer *transfer) +svga_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { struct svga_texture *tex = svga_texture(transfer->texture); - struct svga_screen *ss = svga_screen(transfer->texture->screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -911,6 +918,17 @@ svga_tex_transfer_destroy(struct pipe_transfer *transfer) FREE(st); } + +void +svga_init_texture_functions(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = svga_get_tex_transfer; + pipe->transfer_map = svga_transfer_map; + pipe->transfer_unmap = svga_transfer_unmap; + pipe->tex_transfer_destroy = svga_tex_transfer_destroy; +} + + void svga_screen_init_texture_functions(struct pipe_screen *screen) { @@ -920,10 +938,6 @@ svga_screen_init_texture_functions(struct pipe_screen *screen) screen->texture_destroy = svga_texture_destroy; screen->get_tex_surface = svga_get_tex_surface; screen->tex_surface_destroy = svga_tex_surface_destroy; - screen->get_tex_transfer = svga_get_tex_transfer; - screen->transfer_map = svga_transfer_map; - screen->transfer_unmap = svga_transfer_unmap; - screen->tex_transfer_destroy = svga_tex_transfer_destroy; } /*********************************************************************** diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index ca6602b436..96d035b12d 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -186,6 +186,9 @@ svga_surface_needs_propagation(struct pipe_surface *surf); extern void svga_screen_init_texture_functions(struct pipe_screen *screen); +void +svga_init_texture_functions(struct pipe_context *pipe); + enum SVGA3dSurfaceFormat svga_translate_format(enum pipe_format format); diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 133521f45e..b7e6bbac68 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -27,7 +27,9 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_format.h" +#include "pipe/p_format.h" #include "pipe/p_screen.h" #include "tr_dump.h" @@ -1283,6 +1285,136 @@ trace_is_buffer_referenced( struct pipe_context *_pipe, return referenced; } + +/******************************************************************** + * transfer + */ + + +static struct pipe_transfer * +trace_context_get_tex_transfer(struct pipe_context *_context, + struct pipe_texture *_texture, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_texture *tr_tex = trace_texture(_texture); + struct pipe_context *context = tr_context->pipe; + struct pipe_texture *texture = tr_tex->texture; + struct pipe_transfer *result = NULL; + + assert(texture->screen == context->screen); + + trace_dump_call_begin("pipe_context", "get_tex_transfer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); + + trace_dump_arg(uint, x); + trace_dump_arg(uint, y); + trace_dump_arg(uint, w); + trace_dump_arg(uint, h); + + result = context->get_tex_transfer(context, texture, face, level, zslice, usage, + x, y, w, h); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + if (result) + result = trace_transfer_create(tr_context, tr_tex, result); + + return result; +} + + +static void +trace_context_tex_transfer_destroy(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + + trace_dump_call_begin("pipe_context", "tex_transfer_destroy"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, transfer); + + trace_dump_call_end(); + + trace_transfer_destroy(tr_context, tr_trans); +} + + +static void * +trace_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + void *map; + + map = context->transfer_map(context, transfer); + if(map) { + if(transfer->usage & PIPE_TRANSFER_WRITE) { + assert(!tr_trans->map); + tr_trans->map = map; + } + } + + return map; +} + + +static void +trace_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_ctx = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_ctx->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + + if(tr_trans->map) { + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; + + trace_dump_call_begin("pipe_context", "transfer_write"); + + trace_dump_arg(ptr, context); + + trace_dump_arg(ptr, transfer); + + trace_dump_arg_begin("stride"); + trace_dump_uint(transfer->stride); + trace_dump_arg_end(); + + trace_dump_arg_begin("data"); + trace_dump_bytes(tr_trans->map, size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + tr_trans->map = NULL; + } + + context->transfer_unmap(context, transfer); +} + static const struct debug_named_value rbug_blocker_flags[] = { {"before", 1}, {"after", 2}, @@ -1367,6 +1499,11 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.is_texture_referenced = trace_is_texture_referenced; tr_ctx->base.is_buffer_referenced = trace_is_buffer_referenced; + tr_ctx->base.get_tex_transfer = trace_context_get_tex_transfer; + tr_ctx->base.tex_transfer_destroy = trace_context_tex_transfer_destroy; + tr_ctx->base.transfer_map = trace_context_transfer_map; + tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; + tr_ctx->pipe = pipe; trace_screen_add_to_list(tr_scr, contexts, tr_ctx); diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index 906b3262e4..eaa47df406 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -30,9 +30,6 @@ #include "util/u_memory.h" #include "tr_drm.h" #include "tr_screen.h" -#include "tr_context.h" -#include "tr_buffer.h" -#include "tr_texture.h" #include "tr_public.h" struct trace_drm_api diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index a43adac694..f4f17566fd 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -219,7 +219,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct trace_texture *tr_tex = NULL; struct tr_list *ptr; - struct pipe_screen *screen = tr_scr->screen; + struct pipe_context *context = tr_scr->private_context; struct pipe_texture *tex; struct pipe_transfer *t; @@ -239,12 +239,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, } tex = tr_tex->texture; - t = screen->get_tex_transfer(tr_scr->screen, tex, - gptr->face, gptr->level, gptr->zslice, - PIPE_TRANSFER_READ, - gptr->x, gptr->y, gptr->w, gptr->h); + t = context->get_tex_transfer(context, tex, + gptr->face, gptr->level, gptr->zslice, + PIPE_TRANSFER_READ, + gptr->x, gptr->y, gptr->w, gptr->h); - map = screen->transfer_map(screen, t); + map = context->transfer_map(context, t); rbug_send_texture_read_reply(tr_rbug->con, serial, t->texture->format, @@ -256,8 +256,8 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, t->stride, NULL); - screen->transfer_unmap(screen, t); - screen->tex_transfer_destroy(t); + context->transfer_unmap(context, t); + context->tex_transfer_destroy(context, t); pipe_mutex_unlock(tr_scr->list_mutex); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 86ddb99540..25990bdac7 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -353,133 +353,7 @@ trace_screen_tex_surface_destroy(struct pipe_surface *_surface) } -/******************************************************************** - * transfer - */ - - -static struct pipe_transfer * -trace_screen_get_tex_transfer(struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned face, unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_texture *tr_tex = trace_texture(_texture); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_texture *texture = tr_tex->texture; - struct pipe_transfer *result = NULL; - - assert(texture->screen == screen); - - trace_dump_call_begin("pipe_screen", "get_tex_transfer"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, texture); - trace_dump_arg(uint, face); - trace_dump_arg(uint, level); - trace_dump_arg(uint, zslice); - trace_dump_arg(uint, usage); - - trace_dump_arg(uint, x); - trace_dump_arg(uint, y); - trace_dump_arg(uint, w); - trace_dump_arg(uint, h); - - result = screen->get_tex_transfer(screen, texture, face, level, zslice, usage, - x, y, w, h); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - if (result) - result = trace_transfer_create(tr_tex, result); - - return result; -} - - -static void -trace_screen_tex_transfer_destroy(struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_transfer->texture->screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - - trace_dump_call_begin("pipe_screen", "tex_transfer_destroy"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, transfer); - - trace_dump_call_end(); - - trace_transfer_destroy(tr_trans); -} - - -static void * -trace_screen_transfer_map(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - void *map; - - map = screen->transfer_map(screen, transfer); - if(map) { - if(transfer->usage & PIPE_TRANSFER_WRITE) { - assert(!tr_trans->map); - tr_trans->map = map; - } - } - - return map; -} - - -static void -trace_screen_transfer_unmap(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - - if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; - - trace_dump_call_begin("pipe_screen", "transfer_write"); - - trace_dump_arg(ptr, screen); - - trace_dump_arg(ptr, transfer); - - trace_dump_arg_begin("stride"); - trace_dump_uint(transfer->stride); - trace_dump_arg_end(); - - trace_dump_arg_begin("data"); - trace_dump_bytes(tr_trans->map, size); - trace_dump_arg_end(); - trace_dump_arg_begin("size"); - trace_dump_uint(size); - trace_dump_arg_end(); - - trace_dump_call_end(); - - tr_trans->map = NULL; - } - - screen->transfer_unmap(screen, transfer); -} /******************************************************************** @@ -901,10 +775,6 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.texture_destroy = trace_screen_texture_destroy; tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; tr_scr->base.tex_surface_destroy = trace_screen_tex_surface_destroy; - tr_scr->base.get_tex_transfer = trace_screen_get_tex_transfer; - tr_scr->base.tex_transfer_destroy = trace_screen_tex_transfer_destroy; - tr_scr->base.transfer_map = trace_screen_transfer_map; - tr_scr->base.transfer_unmap = trace_screen_transfer_unmap; tr_scr->base.buffer_create = trace_screen_buffer_create; tr_scr->base.user_buffer_create = trace_screen_user_buffer_create; if (screen->buffer_map) @@ -920,7 +790,11 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.fence_signalled = trace_screen_fence_signalled; tr_scr->base.fence_finish = trace_screen_fence_finish; tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer; + tr_scr->screen = screen; + tr_scr->private_context = screen->context_create(screen, NULL); + if (tr_scr->private_context == NULL) + goto error3; trace_dump_ret(ptr, screen); trace_dump_call_end(); @@ -930,10 +804,8 @@ trace_screen_create(struct pipe_screen *screen) return &tr_scr->base; -#if 0 error3: FREE(tr_scr); -#endif error2: trace_dump_ret(ptr, screen); trace_dump_call_end(); diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index 597e2fc265..9bfbe72e2c 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -56,6 +56,7 @@ struct trace_screen struct pipe_screen base; struct pipe_screen *screen; + struct pipe_context *private_context; /* remote debugger */ struct trace_rbug *rbug; diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 5321d68ec0..d818e21bb8 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -31,6 +31,7 @@ #include "util/u_simple_list.h" #include "tr_screen.h" +#include "tr_context.h" #include "tr_texture.h" @@ -124,8 +125,9 @@ trace_surface_destroy(struct trace_surface *tr_surf) struct pipe_transfer * -trace_transfer_create(struct trace_texture *tr_tex, - struct pipe_transfer *transfer) +trace_transfer_create(struct trace_context *tr_ctx, + struct trace_texture *tr_tex, + struct pipe_transfer *transfer) { struct trace_screen *tr_scr = trace_screen(tr_tex->base.screen); struct trace_transfer *tr_trans; @@ -142,8 +144,9 @@ trace_transfer_create(struct trace_texture *tr_tex, memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer)); tr_trans->base.texture = NULL; - pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base); tr_trans->transfer = transfer; + + pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base); assert(tr_trans->base.texture == &tr_tex->base); trace_screen_add_to_list(tr_scr, transfers, tr_trans); @@ -151,21 +154,23 @@ trace_transfer_create(struct trace_texture *tr_tex, return &tr_trans->base; error: - transfer->texture->screen->tex_transfer_destroy(transfer); + tr_ctx->pipe->tex_transfer_destroy(tr_ctx->pipe, transfer); return NULL; } void -trace_transfer_destroy(struct trace_transfer *tr_trans) +trace_transfer_destroy(struct trace_context *tr_context, + struct trace_transfer *tr_trans) { - struct trace_screen *tr_scr = trace_screen(tr_trans->base.texture->screen); - struct pipe_screen *screen = tr_trans->transfer->texture->screen; + struct trace_screen *tr_scr = trace_screen(tr_context->base.screen); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; trace_screen_remove_from_list(tr_scr, transfers, tr_trans); pipe_texture_reference(&tr_trans->base.texture, NULL); - screen->tex_transfer_destroy(tr_trans->transfer); + context->tex_transfer_destroy(context, transfer); FREE(tr_trans); } diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h index 395e523e73..4dc95308a7 100644 --- a/src/gallium/drivers/trace/tr_texture.h +++ b/src/gallium/drivers/trace/tr_texture.h @@ -34,6 +34,7 @@ #include "tr_screen.h" +struct trace_context; struct trace_texture { @@ -60,6 +61,7 @@ struct trace_transfer struct pipe_transfer base; struct pipe_transfer *transfer; + struct pipe_context *pipe; struct tr_list list; @@ -112,11 +114,13 @@ void trace_surface_destroy(struct trace_surface *tr_surf); struct pipe_transfer * -trace_transfer_create(struct trace_texture *tr_tex, - struct pipe_transfer *transfer); +trace_transfer_create(struct trace_context *tr_ctx, + struct trace_texture *tr_tex, + struct pipe_transfer *transfer); void -trace_transfer_destroy(struct trace_transfer *tr_trans); +trace_transfer_destroy(struct trace_context *tr_ctx, + struct trace_transfer *tr_trans); #endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 376b01aa69..a7f12fb81e 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -306,6 +306,33 @@ struct pipe_context { */ unsigned int (*is_buffer_referenced)(struct pipe_context *pipe, struct pipe_buffer *buf); + + + + /** + * Get a transfer object for transferring data to/from a texture. + * + * Transfers are (by default) context-private and allow uploads to be + * interleaved with + */ + struct pipe_transfer *(*get_tex_transfer)(struct pipe_context *, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, + unsigned w, unsigned h); + + void (*tex_transfer_destroy)(struct pipe_context *, + struct pipe_transfer *); + + void *(*transfer_map)( struct pipe_context *, + struct pipe_transfer *transfer ); + + void (*transfer_unmap)( struct pipe_context *, + struct pipe_transfer *transfer ); + + }; diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index b771bfe85e..b7cb83abbe 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -142,23 +142,6 @@ struct pipe_screen { void (*tex_surface_destroy)(struct pipe_surface *); - /** Get a transfer object for transferring data to/from a texture */ - struct pipe_transfer *(*get_tex_transfer)(struct pipe_screen *, - struct pipe_texture *texture, - unsigned face, unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, - unsigned w, unsigned h); - - void (*tex_transfer_destroy)(struct pipe_transfer *); - - void *(*transfer_map)( struct pipe_screen *, - struct pipe_transfer *transfer ); - - void (*transfer_unmap)( struct pipe_screen *, - struct pipe_transfer *transfer ); - /** * Create a new buffer. diff --git a/src/gallium/include/state_tracker/sw_winsys.h b/src/gallium/include/state_tracker/sw_winsys.h index 0de98bbc1c..9d202e48bf 100644 --- a/src/gallium/include/state_tracker/sw_winsys.h +++ b/src/gallium/include/state_tracker/sw_winsys.h @@ -44,6 +44,7 @@ extern "C" { #endif +struct winsys_handle; struct pipe_screen; struct pipe_context; @@ -68,6 +69,7 @@ struct sw_winsys boolean (*is_displaytarget_format_supported)( struct sw_winsys *ws, + unsigned tex_usage, enum pipe_format format ); /** @@ -83,12 +85,30 @@ struct sw_winsys */ struct sw_displaytarget * (*displaytarget_create)( struct sw_winsys *ws, + unsigned tex_usage, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, unsigned *stride ); /** + * Used to implement texture_from_handle. + */ + struct sw_displaytarget * + (*displaytarget_from_handle)( struct sw_winsys *ws, + const struct pipe_texture *templat, + struct winsys_handle *whandle, + unsigned *stride ); + + /** + * Used to implement texture_get_handle. + */ + boolean + (*displaytarget_get_handle)( struct sw_winsys *ws, + struct sw_displaytarget *dt, + struct winsys_handle *whandle ); + + /** * \param flags bitmask of PIPE_BUFFER_USAGE_x flags */ void * diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index 75ac2e8ada..f772ba5d16 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -166,7 +166,7 @@ dri_make_current(__DRIcontext * cPriv, * flush_frontbuffer directly (in front-buffer rendering), it * will have access to the drawable argument: */ - st_make_current(ctx->st, draw->stfb, read->stfb, NULL); + st_make_current(ctx->st, draw->stfb, read->stfb, ctx); if (__dri1_api_hooks) { dri1_update_drawables(ctx, draw, read); diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 60bc560049..7ccad8f5dd 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -90,6 +90,9 @@ dri_fill_in_modes(struct dri_screen *screen, unsigned pixel_bits) { __DRIconfig **configs = NULL; + __DRIconfig **configs_r5g6b5 = NULL; + __DRIconfig **configs_a8r8g8b8 = NULL; + __DRIconfig **configs_x8r8g8b8 = NULL; unsigned num_modes; uint8_t depth_bits_array[5]; uint8_t stencil_bits_array[5]; @@ -127,25 +130,23 @@ dri_fill_in_modes(struct dri_screen *screen, pf_x8r8g8b8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8X8_UNORM, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0); - - /* we support buffers with different depths only if we can tell the driver - * the actual depth of each of them. */ - if (screen->sPriv->dri2.loader - && (screen->sPriv->dri2.loader->base.version > 2) - && (screen->sPriv->dri2.loader->getBuffersWithFormat != NULL)) { + pf_r5g6b5 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0); + + /* We can only get a 16 or 32 bit depth buffer with getBuffersWithFormat */ + if (screen->sPriv->dri2.loader && + (screen->sPriv->dri2.loader->base.version > 2) && + (screen->sPriv->dri2.loader->getBuffersWithFormat != NULL)) { pf_z16 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z16_UNORM, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); pf_z32 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z32_UNORM, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0); - pf_r5g6b5 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM, - PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_RENDER_TARGET, 0); } else { pf_z16 = FALSE; pf_z32 = FALSE; - pf_r5g6b5 = FALSE; } if (pf_z16) { @@ -175,46 +176,48 @@ dri_fill_in_modes(struct dri_screen *screen, num_modes = depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4; - if (pixel_bits == 16 && pf_r5g6b5) { - configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor, - msaa_samples_array, msaa_samples_factor, - GL_TRUE); + if (pf_r5g6b5) + configs_r5g6b5 = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, + msaa_samples_array, msaa_samples_factor, + GL_TRUE); + + if (pf_a8r8g8b8) + configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, + msaa_samples_factor, + GL_TRUE); + + if (pf_x8r8g8b8) + configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, + msaa_samples_factor, + GL_TRUE); + + if (pixel_bits == 16) { + configs = configs_r5g6b5; + if (configs_a8r8g8b8) + configs = configs ? driConcatConfigs(configs, configs_a8r8g8b8) : configs_a8r8g8b8; + if (configs_x8r8g8b8) + configs = configs ? driConcatConfigs(configs, configs_x8r8g8b8) : configs_x8r8g8b8; } else { - __DRIconfig **configs_a8r8g8b8 = NULL; - __DRIconfig **configs_x8r8g8b8 = NULL; - - if (pf_a8r8g8b8) - configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, - msaa_samples_factor, - GL_TRUE); - if (pf_x8r8g8b8) - configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV, - depth_bits_array, - stencil_bits_array, - depth_buffer_factor, - back_buffer_modes, - back_buffer_factor, - msaa_samples_array, - msaa_samples_factor, - GL_TRUE); - - if (configs_a8r8g8b8 && configs_x8r8g8b8) - configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8); - else if (configs_a8r8g8b8) - configs = configs_a8r8g8b8; - else if (configs_x8r8g8b8) - configs = configs_x8r8g8b8; - else - configs = NULL; + configs = configs_a8r8g8b8; + if (configs_x8r8g8b8) + configs = configs ? driConcatConfigs(configs, configs_x8r8g8b8) : configs_x8r8g8b8; + if (configs_r5g6b5) + configs = configs ? driConcatConfigs(configs, configs_r5g6b5) : configs_r5g6b5; } if (configs == NULL) { diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index e0d12acabe..c6b16354f9 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -82,6 +82,8 @@ struct ximage_surface { int width, height; struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS]; uint valid_mask; + + struct pipe_surface *draw_surface; }; struct ximage_config { @@ -266,15 +268,19 @@ ximage_surface_draw_buffer(struct native_surface *nsurf, assert(xsurf->drawable && xbuf->texture); - /* what's the cost of surface creation? */ - psurf = screen->get_tex_surface(screen, - xbuf->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ); - if (!psurf) - return FALSE; + psurf = xsurf->draw_surface; + if (!psurf || psurf->texture != xbuf->texture) { + pipe_surface_reference(&xsurf->draw_surface, NULL); - screen->flush_frontbuffer(screen, psurf, &xbuf->xdraw); + psurf = screen->get_tex_surface(screen, + xbuf->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ); + if (!psurf) + return FALSE; - pipe_surface_reference(&psurf, NULL); + xsurf->draw_surface = psurf; + } + + screen->flush_frontbuffer(screen, psurf, &xbuf->xdraw); return TRUE; } @@ -371,6 +377,8 @@ ximage_surface_destroy(struct native_surface *nsurf) struct ximage_surface *xsurf = ximage_surface(nsurf); int i; + pipe_surface_reference(&xsurf->draw_surface, NULL); + for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++) ximage_surface_free_buffer(&xsurf->base, i); diff --git a/src/gallium/state_trackers/glx/xlib/Makefile b/src/gallium/state_trackers/glx/xlib/Makefile index ddffdb162c..582e72bb46 100644 --- a/src/gallium/state_trackers/glx/xlib/Makefile +++ b/src/gallium/state_trackers/glx/xlib/Makefile @@ -5,7 +5,8 @@ LIBNAME = xlib LIBRARY_INCLUDES = \ -I$(TOP)/include \ - -I$(TOP)/src/mesa + -I$(TOP)/src/mesa \ + $(X_CFLAGS) C_SOURCES = \ glx_api.c \ diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript index 781f54bf2b..d0d141fd24 100644 --- a/src/gallium/state_trackers/python/SConscript +++ b/src/gallium/state_trackers/python/SConscript @@ -3,7 +3,8 @@ import os.path Import('*') -if 'python' in env['statetrackers']: +if 'python' in env['statetrackers'] and 0: + # FIXME: Disable python state tracker until transfers are done by contexts env = env.Clone() diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c index 02248ad433..18e2cc1f25 100644 --- a/src/gallium/state_trackers/vega/api_filters.c +++ b/src/gallium/state_trackers/vega/api_filters.c @@ -78,14 +78,14 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx, { /* upload color_data */ struct pipe_transfer *transfer = - screen->get_tex_transfer(screen, tex, - 0, 0, 0, - PIPE_TRANSFER_READ_WRITE , - 0, 0, tex->width0, tex->height0); - void *map = screen->transfer_map(screen, transfer); + pipe->get_tex_transfer(pipe, tex, + 0, 0, 0, + PIPE_TRANSFER_READ_WRITE , + 0, 0, tex->width0, tex->height0); + void *map = pipe->transfer_map(pipe, transfer); memcpy(map, color_data, sizeof(VGint)*color_data_len); - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } return tex; diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c index 015241498e..fec473d9d2 100644 --- a/src/gallium/state_trackers/vega/api_images.c +++ b/src/gallium/state_trackers/vega/api_images.c @@ -397,7 +397,6 @@ void vgReadPixels(void * data, VGint dataStride, { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; struct st_framebuffer *stfb = ctx->draw_buffer; struct st_renderbuffer *strb = stfb->strb; @@ -442,7 +441,7 @@ void vgReadPixels(void * data, VGint dataStride, { struct pipe_transfer *transfer; - transfer = screen->get_tex_transfer(screen, strb->texture, 0, 0, 0, + transfer = pipe->get_tex_transfer(pipe, strb->texture, 0, 0, 0, PIPE_TRANSFER_READ, 0, 0, width, height); @@ -451,14 +450,14 @@ void vgReadPixels(void * data, VGint dataStride, #if 0 debug_printf("%d-%d == %d\n", sy, height, y); #endif - pipe_get_tile_rgba(transfer, sx, y, width, 1, df); + pipe_get_tile_rgba(pipe, transfer, sx, y, width, 1, df); y += yStep; _vega_pack_rgba_span_float(ctx, width, temp, dataFormat, dst + yoffset + xoffset); dst += dataStride; } - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); } } diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c index 41c979bfec..a71579cd26 100644 --- a/src/gallium/state_trackers/vega/image.c +++ b/src/gallium/state_trackers/vega/image.c @@ -378,7 +378,7 @@ void image_sub_data(struct vg_image *image, VGfloat *df = (VGfloat*)temp; VGint i; struct vg_context *ctx = vg_current_context(); - struct pipe_screen *screen = ctx->pipe->screen; + struct pipe_context *pipe = ctx->pipe; struct pipe_texture *texture = image_texture(image); VGint xoffset = 0, yoffset = 0; @@ -412,17 +412,17 @@ void image_sub_data(struct vg_image *image, } { /* upload color_data */ - struct pipe_transfer *transfer = screen->get_tex_transfer( - screen, texture, 0, 0, 0, + struct pipe_transfer *transfer = pipe->get_tex_transfer( + pipe, texture, 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0); src += (dataStride * yoffset); for (i = 0; i < height; i++) { _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp); - pipe_put_tile_rgba(transfer, x+image->x, y+image->y, width, 1, df); + pipe_put_tile_rgba(pipe, transfer, x+image->x, y+image->y, width, 1, df); y += yStep; src += dataStride; } - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); } } @@ -435,7 +435,6 @@ void image_get_sub_data(struct vg_image * image, { struct vg_context *ctx = vg_current_context(); struct pipe_context *pipe = ctx->pipe; - struct pipe_screen *screen = pipe->screen; VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4]; VGfloat *df = (VGfloat*)temp; VGint y = 0, yStep = 1; @@ -444,7 +443,7 @@ void image_get_sub_data(struct vg_image * image, { struct pipe_transfer *transfer = - screen->get_tex_transfer(screen, + pipe->get_tex_transfer(pipe, image->texture, 0, 0, 0, PIPE_TRANSFER_READ, 0, 0, @@ -455,13 +454,13 @@ void image_get_sub_data(struct vg_image * image, #if 0 debug_printf("%d-%d == %d\n", sy, height, y); #endif - pipe_get_tile_rgba(transfer, sx+image->x, y, width, 1, df); + pipe_get_tile_rgba(pipe, transfer, sx+image->x, y, width, 1, df); y += yStep; _vega_pack_rgba_span_float(ctx, width, temp, dataFormat, dst); dst += dataStride; } - screen->tex_transfer_destroy(transfer); + pipe->tex_transfer_destroy(pipe, transfer); } } diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c index cdb87d3bf6..dc56b8c5f3 100644 --- a/src/gallium/state_trackers/vega/paint.c +++ b/src/gallium/state_trackers/vega/paint.c @@ -164,10 +164,10 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p) struct pipe_transfer *transfer = st_no_flush_get_tex_transfer(p->base.ctx, tex, 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, 1024, 1); - void *map = screen->transfer_map(screen, transfer); + void *map = pipe->transfer_map(pipe, transfer); memcpy(map, p->gradient.color_data, sizeof(VGint)*1024); - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } return tex; diff --git a/src/gallium/state_trackers/vega/st_inlines.h b/src/gallium/state_trackers/vega/st_inlines.h index 419151c3ae..4d12a4efdd 100644 --- a/src/gallium/state_trackers/vega/st_inlines.h +++ b/src/gallium/state_trackers/vega/st_inlines.h @@ -51,7 +51,6 @@ st_cond_flush_get_tex_transfer(struct vg_context *st, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { - struct pipe_screen *screen = st->pipe->screen; struct pipe_context *pipe = st->pipe; unsigned referenced = pipe->is_texture_referenced(pipe, pt, face, level); @@ -60,7 +59,7 @@ st_cond_flush_get_tex_transfer(struct vg_context *st, (usage & PIPE_TRANSFER_WRITE))) vgFlush(); - return screen->get_tex_transfer(screen, pt, face, level, zslice, usage, + return pipe->get_tex_transfer(pipe, pt, face, level, zslice, usage, x, y, w, h); } @@ -74,10 +73,10 @@ st_no_flush_get_tex_transfer(struct vg_context *st, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *pipe = st->pipe; - return screen->get_tex_transfer(screen, pt, face, level, - zslice, usage, x, y, w, h); + return pipe->get_tex_transfer(pipe, pt, face, level, + zslice, usage, x, y, w, h); } static INLINE void * diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h index 08cc4973bc..e61e9bf9c2 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.h +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h @@ -45,7 +45,7 @@ struct stw_framebuffer /** * This mutex has two purposes: * - protect the access to the mutable data members below - * - prevent the the framebuffer from being deleted while being accessed. + * - prevent the framebuffer from being deleted while being accessed. * * It is OK to lock this mutex while holding the stw_device::fb_mutex lock, * but the opposite must never happen. diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c index c50873c150..715a5e7b94 100644 --- a/src/gallium/state_trackers/xorg/xorg_composite.c +++ b/src/gallium/state_trackers/xorg/xorg_composite.c @@ -359,14 +359,6 @@ bind_samplers(struct exa_context *exa, int op, exa->num_bound_samplers = 0; -#if 0 - if ((pSrc && (exa->pipe->is_texture_referenced(exa->pipe, pSrc->tex, 0, 0) & - PIPE_REFERENCED_FOR_WRITE)) || - (pMask && (exa->pipe->is_texture_referenced(exa->pipe, pMask->tex, 0, 0) & - PIPE_REFERENCED_FOR_WRITE))) - xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL); -#endif - memset(&src_sampler, 0, sizeof(struct pipe_sampler_state)); memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state)); diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c index f60e9abb76..eef428232b 100644 --- a/src/gallium/state_trackers/xorg/xorg_crtc.c +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c @@ -220,16 +220,16 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image) crtcp->cursor_handle = whandle.handle; } - transfer = ms->screen->get_tex_transfer(ms->screen, crtcp->cursor_tex, - 0, 0, 0, - PIPE_TRANSFER_WRITE, - 0, 0, 64, 64); - ptr = ms->screen->transfer_map(ms->screen, transfer); + transfer = ms->ctx->get_tex_transfer(ms->ctx, crtcp->cursor_tex, + 0, 0, 0, + PIPE_TRANSFER_WRITE, + 0, 0, 64, 64); + ptr = ms->ctx->transfer_map(ms->ctx, transfer); util_copy_rect(ptr, crtcp->cursor_tex->format, transfer->stride, 0, 0, 64, 64, (void*)image, 64 * 4, 0, 0); - ms->screen->transfer_unmap(ms->screen, transfer); - ms->screen->tex_transfer_destroy(transfer); + ms->ctx->transfer_unmap(ms->ctx, transfer); + ms->ctx->tex_transfer_destroy(ms->ctx, transfer); } #if HAVE_LIBKMS diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index a7ffe3f499..bdec0e254f 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -188,11 +188,7 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, if (!priv || !priv->tex) return FALSE; - if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & - PIPE_REFERENCED_FOR_WRITE) - exa->pipe->flush(exa->pipe, 0, NULL); - - transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, + transfer = exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0, PIPE_TRANSFER_READ, x, y, w, h); if (!transfer) return FALSE; @@ -203,11 +199,11 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x, int y, int w, int h, char *dst, #endif util_copy_rect((unsigned char*)dst, priv->tex->format, dst_pitch, 0, 0, - w, h, exa->scrn->transfer_map(exa->scrn, transfer), + w, h, exa->pipe->transfer_map(exa->pipe, transfer), transfer->stride, 0, 0); - exa->scrn->transfer_unmap(exa->scrn, transfer); - exa->scrn->tex_transfer_destroy(transfer); + exa->pipe->transfer_unmap(exa->pipe, transfer); + exa->pipe->tex_transfer_destroy(exa->pipe, transfer); return TRUE; } @@ -226,12 +222,7 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, if (!priv || !priv->tex) return FALSE; - /* make sure that any pending operations are flushed to hardware */ - if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & - (PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE)) - xorg_exa_flush(exa, 0, NULL); - - transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, + transfer = exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0, PIPE_TRANSFER_WRITE, x, y, w, h); if (!transfer) return FALSE; @@ -241,12 +232,12 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src, x, y, w, h, src_pitch); #endif - util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer), + util_copy_rect(exa->pipe->transfer_map(exa->pipe, transfer), priv->tex->format, transfer->stride, 0, 0, w, h, (unsigned char*)src, src_pitch, 0, 0); - exa->scrn->transfer_unmap(exa->scrn, transfer); - exa->scrn->tex_transfer_destroy(transfer); + exa->pipe->transfer_unmap(exa->pipe, transfer); + exa->pipe->tex_transfer_destroy(exa->pipe, transfer); return TRUE; } @@ -270,15 +261,11 @@ ExaPrepareAccess(PixmapPtr pPix, int index) if (priv->map_count == 0) { - if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) & - PIPE_REFERENCED_FOR_WRITE) - exa->pipe->flush(exa->pipe, 0, NULL); - assert(pPix->drawable.width <= priv->tex->width0); assert(pPix->drawable.height <= priv->tex->height0); priv->map_transfer = - exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0, + exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0, #ifdef EXA_MIXED_PIXMAPS PIPE_TRANSFER_MAP_DIRECTLY | #endif @@ -294,7 +281,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index) #endif pPix->devPrivate.ptr = - exa->scrn->transfer_map(exa->scrn, priv->map_transfer); + exa->pipe->transfer_map(exa->pipe, priv->map_transfer); pPix->devKind = priv->map_transfer->stride; } @@ -321,8 +308,8 @@ ExaFinishAccess(PixmapPtr pPix, int index) if (--priv->map_count == 0) { assert(priv->map_transfer); - exa->scrn->transfer_unmap(exa->scrn, priv->map_transfer); - exa->scrn->tex_transfer_destroy(priv->map_transfer); + exa->pipe->transfer_unmap(exa->pipe, priv->map_transfer); + exa->pipe->tex_transfer_destroy(exa->pipe, priv->map_transfer); priv->map_transfer = NULL; pPix->devPrivate.ptr = NULL; } diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c index e37a1c3959..5a195cb482 100644 --- a/src/gallium/state_trackers/xorg/xorg_xv.c +++ b/src/gallium/state_trackers/xorg/xorg_xv.c @@ -275,28 +275,28 @@ copy_packed_data(ScrnInfoPtr pScrn, int i, j; struct pipe_texture **dst = port->yuv[port->current_set]; struct pipe_transfer *ytrans, *utrans, *vtrans; - struct pipe_screen *screen = port->r->pipe->screen; + struct pipe_context *pipe = port->r->pipe; char *ymap, *vmap, *umap; unsigned char y1, y2, u, v; int yidx, uidx, vidx; int y_array_size = w * h; - ytrans = screen->get_tex_transfer(screen, dst[0], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); - utrans = screen->get_tex_transfer(screen, dst[1], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); - vtrans = screen->get_tex_transfer(screen, dst[2], - 0, 0, 0, - PIPE_TRANSFER_WRITE, - left, top, w, h); - - ymap = (char*)screen->transfer_map(screen, ytrans); - umap = (char*)screen->transfer_map(screen, utrans); - vmap = (char*)screen->transfer_map(screen, vtrans); + ytrans = pipe->get_tex_transfer(pipe, dst[0], + 0, 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); + utrans = pipe->get_tex_transfer(pipe, dst[1], + 0, 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); + vtrans = pipe->get_tex_transfer(pipe, dst[2], + 0, 0, 0, + PIPE_TRANSFER_WRITE, + left, top, w, h); + + ymap = (char*)pipe->transfer_map(pipe, ytrans); + umap = (char*)pipe->transfer_map(pipe, utrans); + vmap = (char*)pipe->transfer_map(pipe, vtrans); yidx = uidx = vidx = 0; @@ -362,12 +362,12 @@ copy_packed_data(ScrnInfoPtr pScrn, break; } - screen->transfer_unmap(screen, ytrans); - screen->transfer_unmap(screen, utrans); - screen->transfer_unmap(screen, vtrans); - screen->tex_transfer_destroy(ytrans); - screen->tex_transfer_destroy(utrans); - screen->tex_transfer_destroy(vtrans); + pipe->transfer_unmap(pipe, ytrans); + pipe->transfer_unmap(pipe, utrans); + pipe->transfer_unmap(pipe, vtrans); + pipe->tex_transfer_destroy(pipe, ytrans); + pipe->tex_transfer_destroy(pipe, utrans); + pipe->tex_transfer_destroy(pipe, vtrans); } diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 1e6769367f..eff4b4778b 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c @@ -87,8 +87,10 @@ swrast_xlib_create_screen( Display *display ) screen = llvmpipe_create_screen( winsys ); #endif +#if defined(GALLIUM_SOFTPIPE) if (screen == NULL) screen = softpipe_create_screen( winsys ); +#endif if (screen == NULL) goto fail; diff --git a/src/gallium/winsys/drm/Makefile b/src/gallium/winsys/drm/Makefile index fee0191643..a998aff931 100644 --- a/src/gallium/winsys/drm/Makefile +++ b/src/gallium/winsys/drm/Makefile @@ -2,7 +2,7 @@ TOP = ../../../.. include $(TOP)/configs/current -SUBDIRS = $(GALLIUM_WINSYS_DRM_DIRS) +SUBDIRS = sw $(GALLIUM_WINSYS_DRM_DIRS) default install clean: @for dir in $(SUBDIRS) ; do \ diff --git a/src/gallium/winsys/drm/i965/dri/Makefile b/src/gallium/winsys/drm/i965/dri/Makefile index f7e81eed87..56690769fc 100644 --- a/src/gallium/winsys/drm/i965/dri/Makefile +++ b/src/gallium/winsys/drm/i965/dri/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/i965/gem/libi965drm.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/winsys/drm/sw/libswdrm.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/identity/libidentity.a \ $(TOP)/src/gallium/drivers/i965/libi965.a diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index 21e82303f7..2ebc05f47e 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -10,6 +10,8 @@ #include "trace/tr_drm.h" +#include "../../sw/sw_drm_api.h" + /* * Helper functions */ @@ -108,5 +110,13 @@ struct drm_api i965_libdrm_api = struct drm_api * drm_api_create() { - return trace_drm_create(&i965_libdrm_api); + struct drm_api *api; + + if (api == NULL && debug_get_bool_option("BRW_SOFTPIPE", FALSE)) + api = sw_drm_api_create(&i965_libdrm_api); + + if (api == NULL) + api = &i965_libdrm_api; + + return trace_drm_create(api); } diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile index 7e95f79d03..50ac3f203e 100644 --- a/src/gallium/winsys/drm/nouveau/dri/Makefile +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -6,8 +6,7 @@ LIBNAME = nouveau_dri.so PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 21517b4bb5..716d4bacd3 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -86,11 +86,9 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, switch (dev->chipset & 0xf0) { case 0x30: - init = nv30_screen_create; - break; case 0x40: case 0x60: - init = nv40_screen_create; + init = nvfx_screen_create; break; case 0x50: case 0x80: diff --git a/src/gallium/winsys/drm/nouveau/egl/Makefile b/src/gallium/winsys/drm/nouveau/egl/Makefile index 2c35260332..47d1127615 100644 --- a/src/gallium/winsys/drm/nouveau/egl/Makefile +++ b/src/gallium/winsys/drm/nouveau/egl/Makefile @@ -7,8 +7,7 @@ EGL_DRIVER_LIBS = -ldrm_nouveau EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a diff --git a/src/gallium/winsys/drm/nouveau/xorg/Makefile b/src/gallium/winsys/drm/nouveau/xorg/Makefile index 179b50230b..f7f6fe17dd 100644 --- a/src/gallium/winsys/drm/nouveau/xorg/Makefile +++ b/src/gallium/winsys/drm/nouveau/xorg/Makefile @@ -18,8 +18,7 @@ INCLUDES = \ LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(GALLIUM_AUXILIARIES) diff --git a/src/gallium/winsys/drm/radeon/core/Makefile b/src/gallium/winsys/drm/radeon/core/Makefile index 860cbb6dbf..13bbbf730d 100644 --- a/src/gallium/winsys/drm/radeon/core/Makefile +++ b/src/gallium/winsys/drm/radeon/core/Makefile @@ -5,7 +5,7 @@ include $(TOP)/configs/current LIBNAME = radeonwinsys C_SOURCES = \ - radeon_buffer.c \ + radeon_drm_buffer.c \ radeon_drm.c \ radeon_r300.c diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c index daa032af6f..25b58b2926 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c @@ -213,6 +213,18 @@ static void radeon_buffer_unmap(struct pipe_winsys *ws, } } +static boolean radeon_is_buffer_referenced(struct radeon_winsys *ws, + struct pipe_buffer *buffer) +{ + struct radeon_pipe_buffer *radeon_buffer = + (struct radeon_pipe_buffer*)buffer; + uint32_t domain; + + /* Referenced by CS or HW. */ + return radeon_bo_is_referenced_by_cs(radeon_buffer->bo, ws->priv->cs) || + radeon_bo_is_busy(radeon_buffer->bo, &domain); +} + static void radeon_buffer_set_tiling(struct radeon_winsys *ws, struct pipe_buffer *buffer, uint32_t pitch, @@ -370,5 +382,7 @@ struct radeon_winsys* radeon_pipe_winsys(int fd) radeon_ws->buffer_from_handle = radeon_buffer_from_handle; radeon_ws->buffer_get_handle = radeon_buffer_get_handle; + radeon_ws->is_buffer_referenced = radeon_is_buffer_referenced; + return radeon_ws; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h index f776e2d900..e1fcfcfcca 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h @@ -30,48 +30,56 @@ #ifndef RADEON_BUFFER_H #define RADEON_BUFFER_H +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + #include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" #include "radeon_bo.h" #include "radeon_cs.h" #include "radeon_winsys.h" -struct radeon_pipe_buffer { - struct pipe_buffer base; - /* Pointer to GPU-backed BO. */ - struct radeon_bo *bo; - /* Pointer to fallback PB buffer. */ - struct pb_buffer *pb; - boolean flinked; - uint32_t flink; -}; #define RADEON_MAX_BOS 24 -struct radeon_winsys_priv { - /* DRM FD */ - int fd; +static INLINE struct pb_buffer * +radeon_pb_buffer(struct r300_winsys_buffer *buffer) +{ + return (struct pb_buffer *)buffer; +} - /* Radeon BO manager. */ - struct radeon_bo_manager* bom; +static INLINE struct r300_winsys_buffer * +radeon_libdrm_winsys_buffer(struct pb_buffer *buffer) +{ + return (struct r300_winsys_buffer *)buffer; +} - /* Radeon CS manager. */ - struct radeon_cs_manager* csm; +struct pb_manager * +radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws); - /* Current CS. */ - struct radeon_cs* cs; +boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, + uint32_t rd, uint32_t wd); - /* Flush CB */ - void (*flush_cb)(void *); - void *flush_data; -}; -struct radeon_winsys* radeon_pipe_winsys(int fb); -#if 0 -struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, - uint32_t handle, - enum pipe_format format, - int w, int h, int pitch); -#endif +void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf, + uint32_t rd, uint32_t wd, + uint32_t flags); + +struct radeon_libdrm_winsys* radeon_pipe_winsys(int fd); + +struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, + uint32_t handle); + +void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, boolean microtiled, boolean macrotiled, uint32_t pitch); + +void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr); + +boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, + struct winsys_handle *whandle); + +boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf); #endif diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c index 97edb6a47e..d70173e805 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c @@ -41,8 +41,22 @@ #include "xf86drm.h" #include <sys/ioctl.h> +static struct radeon_libdrm_winsys * +radeon_winsys_create(int fd) +{ + struct radeon_libdrm_winsys *rws; + + rws = CALLOC_STRUCT(radeon_libdrm_winsys); + if (rws == NULL) { + return NULL; + } + + rws->fd = fd; + return rws; +} + /* Helper function to do the ioctls needed for setup and init. */ -static void do_ioctls(int fd, struct radeon_winsys* winsys) +static void do_ioctls(int fd, struct radeon_libdrm_winsys* winsys) { struct drm_radeon_gem_info gem_info = {0}; struct drm_radeon_info info = {0}; @@ -133,19 +147,28 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB, struct drm_create_screen_arg *arg) { - struct radeon_winsys* rwinsys = radeon_pipe_winsys(drmFB); - do_ioctls(drmFB, rwinsys); + struct radeon_libdrm_winsys* rws; + boolean ret; + + rws = radeon_winsys_create(drmFB); + if (!rws) + return NULL; + + do_ioctls(drmFB, rws); /* The state tracker can organize a softpipe fallback if no hw * driver is found. */ - if (is_r3xx(rwinsys->pci_id)) { - radeon_setup_winsys(drmFB, rwinsys); - return r300_create_screen(rwinsys); - } else { - FREE(rwinsys); - return NULL; + if (is_r3xx(rws->pci_id)) { + ret = radeon_setup_winsys(drmFB, rws); + if (ret == FALSE) + goto fail; + return r300_create_screen(&rws->base); } + +fail: + FREE(rws); + return NULL; } static void radeon_drm_api_destroy(struct drm_api *api) diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.h b/src/gallium/winsys/drm/radeon/core/radeon_drm.h index 78451b6f01..2dc077c052 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_drm.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.h @@ -37,6 +37,22 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api, int drmFB, struct drm_create_screen_arg *arg); +boolean radeon_buffer_from_texture(struct drm_api* api, + struct pipe_screen* screen, + struct pipe_texture* texture, + struct pipe_buffer** buffer, + unsigned* stride); + +boolean radeon_handle_from_buffer(struct drm_api* api, + struct pipe_screen* screen, + struct pipe_buffer* buffer, + unsigned* handle); + +boolean radeon_global_handle_from_buffer(struct drm_api* api, + struct pipe_screen* screen, + struct pipe_buffer* buffer, + unsigned* handle); + void radeon_destroy_drm_api(struct drm_api* api); /* Guess at whether this chipset should use r300g. diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_drm_buffer.c new file mode 100644 index 0000000000..cc56a2bb8f --- /dev/null +++ b/src/gallium/winsys/drm/radeon/core/radeon_drm_buffer.c @@ -0,0 +1,368 @@ + +#include <sys/ioctl.h> +#include "radeon_drm.h" +#include "radeon_bo_gem.h" +#include "radeon_cs_gem.h" +#include "radeon_buffer.h" + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "pipebuffer/pb_buffer.h" +#include "pipebuffer/pb_bufmgr.h" + +#include "radeon_winsys.h" +struct radeon_drm_bufmgr; + +struct radeon_drm_buffer { + struct pb_buffer base; + struct radeon_drm_bufmgr *mgr; + + struct radeon_bo *bo; + + boolean flinked; + uint32_t flink; + + boolean mapped; + struct radeon_drm_buffer *next, *prev; +}; + +extern const struct pb_vtbl radeon_drm_buffer_vtbl; + + +static INLINE struct radeon_drm_buffer * +radeon_drm_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &radeon_drm_buffer_vtbl); + return (struct radeon_drm_buffer *)buf; +} + +struct radeon_drm_bufmgr { + struct pb_manager base; + struct radeon_libdrm_winsys *rws; + struct radeon_drm_buffer buffer_map_list; +}; + +static INLINE struct radeon_drm_bufmgr * +radeon_drm_bufmgr(struct pb_manager *mgr) +{ + assert(mgr); + return (struct radeon_drm_bufmgr *)mgr; +} + +static void +radeon_drm_buffer_destroy(struct pb_buffer *_buf) +{ + struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); + + if (buf->mapped) { + remove_from_list(buf); + radeon_bo_unmap(buf->bo); + buf->mapped = false; + } + radeon_bo_unref(buf->bo); + + FREE(buf); +} + +static void * +radeon_drm_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct radeon_drm_buffer *buf = radeon_drm_buffer(_buf); + int write; + + if (buf->mapped) + return buf->bo->ptr; + + if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) { + uint32_t domain; + + if (radeon_bo_is_busy(buf->bo, &domain)) + return NULL; + } + + + if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { + buf->mgr->rws->flush_cb(buf->mgr->rws->flush_data); + } + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { + write = 1; + } + + if (radeon_bo_map(buf->bo, write)) { + return NULL; + } + buf->mapped = true; + insert_at_tail(&buf->mgr->buffer_map_list, buf); + return buf->bo->ptr; +} + +static void +radeon_drm_buffer_unmap(struct pb_buffer *_buf) +{ + (void)_buf; +} + +static void +radeon_drm_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +static enum pipe_error +radeon_drm_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + /* Always pinned */ + return PIPE_OK; +} + +static void +radeon_drm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ +} + +const struct pb_vtbl radeon_drm_buffer_vtbl = { + radeon_drm_buffer_destroy, + radeon_drm_buffer_map, + radeon_drm_buffer_unmap, + radeon_drm_buffer_validate, + radeon_drm_buffer_fence, + radeon_drm_buffer_get_base_buffer, +}; + + +static uint32_t radeon_domain_from_usage(unsigned usage) +{ + uint32_t domain = 0; + + if (usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + if (usage & PIPE_BUFFER_USAGE_INDEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + + return domain; +} + +struct pb_buffer *radeon_drm_bufmgr_create_buffer_from_handle(struct pb_manager *_mgr, + uint32_t handle) +{ + struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); + struct radeon_libdrm_winsys *rws = mgr->rws; + struct radeon_drm_buffer *buf; + struct radeon_bo *bo; + + bo = radeon_bo_open(rws->bom, handle, 0, + 0, 0, 0); + if (bo == NULL) + return NULL; + + buf = CALLOC_STRUCT(radeon_drm_buffer); + if (!buf) { + radeon_bo_unref(bo); + return NULL; + } + + make_empty_list(buf); + + pipe_reference_init(&buf->base.base.reference, 1); + buf->base.base.alignment = 0; + buf->base.base.usage = PIPE_BUFFER_USAGE_PIXEL; + buf->base.base.size = 0; + buf->base.vtbl = &radeon_drm_buffer_vtbl; + buf->mgr = mgr; + + buf->bo = bo; + + return &buf->base; +} + +static struct pb_buffer * +radeon_drm_bufmgr_create_buffer(struct pb_manager *_mgr, + pb_size size, + const struct pb_desc *desc) +{ + struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); + struct radeon_libdrm_winsys *rws = mgr->rws; + struct radeon_drm_buffer *buf; + uint32_t domain; + + buf = CALLOC_STRUCT(radeon_drm_buffer); + if (!buf) + goto error1; + + pipe_reference_init(&buf->base.base.reference, 1); + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &radeon_drm_buffer_vtbl; + buf->mgr = mgr; + + make_empty_list(buf); + domain = radeon_domain_from_usage(desc->usage); + buf->bo = radeon_bo_open(rws->bom, 0, size, + desc->alignment, domain, 0); + if (buf->bo == NULL) + goto error2; + + return &buf->base; + + error2: + FREE(buf); + error1: + return NULL; +} + +static void +radeon_drm_bufmgr_flush(struct pb_manager *mgr) +{ + /* NOP */ +} + +static void +radeon_drm_bufmgr_destroy(struct pb_manager *_mgr) +{ + struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); + FREE(mgr); +} + +struct pb_manager * +radeon_drm_bufmgr_create(struct radeon_libdrm_winsys *rws) +{ + struct radeon_drm_bufmgr *mgr; + + mgr = CALLOC_STRUCT(radeon_drm_bufmgr); + if (!mgr) + return NULL; + + mgr->base.destroy = radeon_drm_bufmgr_destroy; + mgr->base.create_buffer = radeon_drm_bufmgr_create_buffer; + mgr->base.flush = radeon_drm_bufmgr_flush; + + mgr->rws = rws; + make_empty_list(&mgr->buffer_map_list); + return &mgr->base; +} + +static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) +{ + struct radeon_drm_buffer *buf; + if (_buf->vtbl == &radeon_drm_buffer_vtbl) { + buf = radeon_drm_buffer(_buf); + } else { + struct pb_buffer *base_buf; + pb_size offset; + pb_get_base_buffer(_buf, &base_buf, &offset); + + buf = radeon_drm_buffer(base_buf); + } + return buf; +} + +boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, + struct winsys_handle *whandle) +{ + int retval, fd; + struct drm_gem_flink flink; + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!buf->flinked) { + fd = buf->mgr->rws->fd; + flink.handle = buf->bo->handle; + + retval = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink); + if (retval) { + return false; + } + + buf->flinked = TRUE; + buf->flink = flink.name; + } + whandle->handle = buf->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = buf->bo->handle; + } + return TRUE; +} + + +void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, boolean microtiled, boolean macrotiled, uint32_t pitch) +{ + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + uint32_t flags = 0; + + if (microtiled) + flags |= RADEON_BO_FLAGS_MICRO_TILE; + if (macrotiled) + flags |= RADEON_BO_FLAGS_MACRO_TILE; + + radeon_bo_set_tiling(buf->bo, flags, pitch); + +} + +boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, + uint32_t rd, uint32_t wd) +{ + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + radeon_cs_space_add_persistent_bo(buf->mgr->rws->cs, buf->bo, + rd, wd); + return true; +} + +void radeon_drm_bufmgr_write_reloc(struct pb_buffer *_buf, + uint32_t rd, uint32_t wd, + uint32_t flags) +{ + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + int retval; + + retval = radeon_cs_write_reloc(buf->mgr->rws->cs, + buf->bo, rd, wd, flags); + if (retval) { + debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", + buf, rd, wd, flags); + } +} + +boolean radeon_drm_bufmgr_is_buffer_referenced(struct pb_buffer *_buf) +{ + struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + uint32_t domain; + + return (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs) || + radeon_bo_is_busy(buf->bo, &domain)); +} + + +void radeon_drm_bufmgr_flush_maps(struct pb_manager *_mgr) +{ + struct radeon_drm_bufmgr *mgr = radeon_drm_bufmgr(_mgr); + struct radeon_drm_buffer *rpb, *t_rpb; + + foreach_s(rpb, t_rpb, &mgr->buffer_map_list) { + rpb->mapped = 0; + radeon_bo_unmap(rpb->bo); + remove_from_list(rpb); + } + + make_empty_list(&mgr->buffer_map_list); + + +} diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c index 122bd21354..5b82a776a8 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c @@ -23,31 +23,139 @@ #include "radeon_r300.h" #include "radeon_buffer.h" +#include "radeon_bo_gem.h" #include "radeon_cs_gem.h" +#include "state_tracker/drm_api.h" -static void radeon_set_flush_cb(struct radeon_winsys *winsys, +static struct r300_winsys_buffer * +radeon_r300_winsys_buffer_create(struct r300_winsys_screen *rws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct pb_desc desc; + struct pb_manager *provider; + struct pb_buffer *buffer; + + memset(&desc, 0, sizeof(desc)); + desc.alignment = alignment; + desc.usage = usage; + + if (usage & PIPE_BUFFER_USAGE_CONSTANT) + provider = ws->mman; + else + provider = ws->kman; + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + return radeon_libdrm_winsys_buffer(buffer); +} + +static void radeon_r300_winsys_buffer_destroy(struct r300_winsys_buffer *buf) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + + pb_destroy(_buf); +} +static void radeon_r300_winsys_buffer_set_tiling(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, + uint32_t pitch, + boolean microtiled, + boolean macrotiled) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + radeon_drm_bufmgr_set_tiling(_buf, microtiled, macrotiled, pitch); +} + +static void *radeon_r300_winsys_buffer_map(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned usage) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + + return pb_map(_buf, usage); +} + +static void radeon_r300_winsys_buffer_unmap(struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + + pb_unmap(_buf); +} + +static void radeon_r300_winsys_buffer_reference(struct r300_winsys_screen *rws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src) +{ + struct pb_buffer *_src = radeon_pb_buffer(src); + struct pb_buffer *_dst = radeon_pb_buffer(*pdst); + + pb_reference(&_dst, _src); + + *pdst = radeon_libdrm_winsys_buffer(_dst); +} + +static boolean radeon_r300_winsys_is_buffer_referenced(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buf); + + return radeon_drm_bufmgr_is_buffer_referenced(_buf); +} + +static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, + struct pipe_screen *screen, + struct winsys_handle *whandle, + unsigned *stride) +{ + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct pb_buffer *_buf; + + _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); + *stride = whandle->stride; + return radeon_libdrm_winsys_buffer(_buf); +} + +static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buffer, + unsigned stride, + struct winsys_handle *whandle) +{ + struct pb_buffer *_buf = radeon_pb_buffer(buffer); + boolean ret; + ret = radeon_drm_bufmgr_get_handle(_buf, whandle); + if (ret) + whandle->stride = stride; + return ret; +} + +static void radeon_set_flush_cb(struct r300_winsys_screen *rws, void (*flush_cb)(void *), void *data) { - winsys->priv->flush_cb = flush_cb; - winsys->priv->flush_data = data; - radeon_cs_space_set_flush(winsys->priv->cs, flush_cb, data); + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + ws->flush_cb = flush_cb; + ws->flush_data = data; + radeon_cs_space_set_flush(ws->cs, flush_cb, data); } -static boolean radeon_add_buffer(struct radeon_winsys* winsys, - struct pipe_buffer* pbuffer, +static boolean radeon_add_buffer(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, uint32_t rd, uint32_t wd) { - struct radeon_bo* bo = ((struct radeon_pipe_buffer*)pbuffer)->bo; + struct pb_buffer *_buf = radeon_pb_buffer(buf); - radeon_cs_space_add_persistent_bo(winsys->priv->cs, bo, rd, wd); - return TRUE; + return radeon_drm_bufmgr_add_buffer(_buf, rd, wd); } -static boolean radeon_validate(struct radeon_winsys* winsys) +static boolean radeon_validate(struct r300_winsys_screen *rws) { - if (radeon_cs_space_check(winsys->priv->cs) < 0) { + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + if (radeon_cs_space_check(ws->cs) < 0) { return FALSE; } @@ -55,108 +163,175 @@ static boolean radeon_validate(struct radeon_winsys* winsys) return TRUE; } -static boolean radeon_check_cs(struct radeon_winsys* winsys, int size) +static boolean radeon_check_cs(struct r300_winsys_screen *rws, int size) { - struct radeon_cs* cs = winsys->priv->cs; + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + struct radeon_cs *cs = ws->cs; - return radeon_validate(winsys) && cs->cdw + size <= cs->ndw; + return radeon_validate(rws) && cs->cdw + size <= cs->ndw; } -static void radeon_begin_cs(struct radeon_winsys* winsys, +static void radeon_begin_cs(struct r300_winsys_screen *rws, int size, const char* file, const char* function, int line) { - radeon_cs_begin(winsys->priv->cs, size, file, function, line); + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + radeon_cs_begin(ws->cs, size, file, function, line); } -static void radeon_write_cs_dword(struct radeon_winsys* winsys, +static void radeon_write_cs_dword(struct r300_winsys_screen *rws, uint32_t dword) { - radeon_cs_write_dword(winsys->priv->cs, dword); + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + radeon_cs_write_dword(ws->cs, dword); } -static void radeon_write_cs_reloc(struct radeon_winsys* winsys, - struct pipe_buffer* pbuffer, +static void radeon_write_cs_reloc(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, uint32_t rd, uint32_t wd, uint32_t flags) { - int retval = 0; - struct radeon_pipe_buffer* radeon_buffer = - (struct radeon_pipe_buffer*)pbuffer; - - assert(!radeon_buffer->pb); - - retval = radeon_cs_write_reloc(winsys->priv->cs, radeon_buffer->bo, - rd, wd, flags); - - if (retval) { - debug_printf("radeon: Relocation of %p (%d, %d, %d) failed!\n", - pbuffer, rd, wd, flags); - } + struct pb_buffer *_buf = radeon_pb_buffer(buf); + radeon_drm_bufmgr_write_reloc(_buf, rd, wd, flags); } -static void radeon_reset_bos(struct radeon_winsys *winsys) +static void radeon_reset_bos(struct r300_winsys_screen *rws) { - radeon_cs_space_reset_bos(winsys->priv->cs); + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + radeon_cs_space_reset_bos(ws->cs); } -static void radeon_end_cs(struct radeon_winsys* winsys, +static void radeon_end_cs(struct r300_winsys_screen *rws, const char* file, const char* function, int line) { - radeon_cs_end(winsys->priv->cs, file, function, line); + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); + radeon_cs_end(ws->cs, file, function, line); } -static void radeon_flush_cs(struct radeon_winsys* winsys) +static void radeon_flush_cs(struct r300_winsys_screen *rws) { + struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); int retval; /* Don't flush a zero-sized CS. */ - if (!winsys->priv->cs->cdw) { + if (!ws->cs->cdw) { return; } + radeon_drm_bufmgr_flush_maps(ws->kman); /* Emit the CS. */ - retval = radeon_cs_emit(winsys->priv->cs); + retval = radeon_cs_emit(ws->cs); if (retval) { debug_printf("radeon: Bad CS, dumping...\n"); - radeon_cs_print(winsys->priv->cs, stderr); + radeon_cs_print(ws->cs, stderr); } /* Reset CS. * Someday, when we care about performance, we should really find a way * to rotate between two or three CS objects so that the GPU can be * spinning through one CS while another one is being filled. */ - radeon_cs_erase(winsys->priv->cs); + radeon_cs_erase(ws->cs); } -void -radeon_setup_winsys(int fd, struct radeon_winsys* winsys) +static uint32_t radeon_get_value(struct r300_winsys_screen *rws, + enum r300_value_id id) { - struct radeon_winsys_priv* priv = winsys->priv; + struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; - priv->csm = radeon_cs_manager_gem_ctor(fd); + switch(id) { + case R300_VID_PCI_ID: + return ws->pci_id; + case R300_VID_GB_PIPES: + return ws->gb_pipes; + case R300_VID_Z_PIPES: + return ws->z_pipes; + } + return 0; +} + +static void +radeon_winsys_destroy(struct r300_winsys_screen *rws) +{ + struct radeon_libdrm_winsys *ws = (struct radeon_libdrm_winsys *)rws; + radeon_cs_destroy(ws->cs); + + ws->kman->destroy(ws->kman); + ws->mman->destroy(ws->mman); + + radeon_bo_manager_gem_dtor(ws->bom); + radeon_cs_manager_gem_dtor(ws->csm); +} + +boolean +radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) +{ + + ws->csm = radeon_cs_manager_gem_ctor(fd); + if (!ws->csm) + goto fail; + ws->bom = radeon_bo_manager_gem_ctor(fd); + if (!ws->bom) + goto fail; + ws->kman = radeon_drm_bufmgr_create(ws); + if (!ws->kman) + goto fail; + + ws->mman = pb_malloc_bufmgr_create(); + if (!ws->mman) + goto fail; /* Size limit on IBs is 64 kibibytes. */ - priv->cs = radeon_cs_create(priv->csm, 1024 * 64 / 4); - radeon_cs_set_limit(priv->cs, - RADEON_GEM_DOMAIN_GTT, winsys->gart_size); - radeon_cs_set_limit(priv->cs, - RADEON_GEM_DOMAIN_VRAM, winsys->vram_size); - - winsys->add_buffer = radeon_add_buffer; - winsys->validate = radeon_validate; - - winsys->check_cs = radeon_check_cs; - winsys->begin_cs = radeon_begin_cs; - winsys->write_cs_dword = radeon_write_cs_dword; - winsys->write_cs_reloc = radeon_write_cs_reloc; - winsys->end_cs = radeon_end_cs; - winsys->flush_cs = radeon_flush_cs; - winsys->reset_bos = radeon_reset_bos; - winsys->set_flush_cb = radeon_set_flush_cb; + ws->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4); + if (!ws->cs) + goto fail; + radeon_cs_set_limit(ws->cs, + RADEON_GEM_DOMAIN_GTT, ws->gart_size); + radeon_cs_set_limit(ws->cs, + RADEON_GEM_DOMAIN_VRAM, ws->vram_size); + + ws->base.add_buffer = radeon_add_buffer; + ws->base.validate = radeon_validate; + ws->base.destroy = radeon_winsys_destroy; + ws->base.check_cs = radeon_check_cs; + ws->base.begin_cs = radeon_begin_cs; + ws->base.write_cs_dword = radeon_write_cs_dword; + ws->base.write_cs_reloc = radeon_write_cs_reloc; + ws->base.end_cs = radeon_end_cs; + ws->base.flush_cs = radeon_flush_cs; + ws->base.reset_bos = radeon_reset_bos; + ws->base.set_flush_cb = radeon_set_flush_cb; + ws->base.get_value = radeon_get_value; + + ws->base.buffer_create = radeon_r300_winsys_buffer_create; + ws->base.buffer_destroy = radeon_r300_winsys_buffer_destroy; + ws->base.buffer_set_tiling = radeon_r300_winsys_buffer_set_tiling; + ws->base.buffer_map = radeon_r300_winsys_buffer_map; + ws->base.buffer_unmap = radeon_r300_winsys_buffer_unmap; + ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; + ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; + ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; + ws->base.is_buffer_referenced = radeon_r300_winsys_is_buffer_referenced; + return TRUE; + +fail: + if (ws->csm) + radeon_cs_manager_gem_dtor(ws->csm); + + if (ws->bom) + radeon_bo_manager_gem_dtor(ws->bom); + + + if (ws->kman) + ws->kman->destroy(ws->kman); + if (ws->mman) + ws->mman->destroy(ws->mman); + + if (ws->cs) + radeon_cs_destroy(ws->cs); + return FALSE; } diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h index e655dc32c8..2703464ad8 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h @@ -25,6 +25,6 @@ #include "radeon_winsys.h" -void radeon_setup_winsys(int fd, struct radeon_winsys* winsys); +boolean radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* winsys); #endif /* RADEON_R300_H */ diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h index 37eeb45979..16cc701ad6 100644 --- a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h +++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h @@ -30,16 +30,15 @@ #ifndef RADEON_WINSYS_H #define RADEON_WINSYS_H -#include "util/u_simple_screen.h" +#include "r300_winsys.h" -struct radeon_winsys_priv; - -struct radeon_winsys { +struct radeon_libdrm_winsys { /* Parent class. */ - struct pipe_winsys base; + struct r300_winsys_screen base; + + struct pb_manager *kman; - /* Winsys private */ - struct radeon_winsys_priv* priv; + struct pb_manager *mman; /* PCI ID */ uint32_t pci_id; @@ -56,68 +55,27 @@ struct radeon_winsys { /* VRAM size. */ uint32_t vram_size; - /* Create a buffer from a winsys handle. */ - struct pipe_buffer *(*buffer_from_handle)(struct radeon_winsys *winsys, - struct pipe_screen *screen, - struct winsys_handle *whandle, - unsigned *stride); - - /* Get the handle from a buffer. */ - boolean (*buffer_get_handle)(struct radeon_winsys *winsys, - struct pipe_buffer *buffer, - unsigned stride, - struct winsys_handle *whandle); - - /* Add a pipe_buffer to the list of buffer objects to validate. */ - boolean (*add_buffer)(struct radeon_winsys* winsys, - struct pipe_buffer* pbuffer, - uint32_t rd, - uint32_t wd); - - /* Revalidate all currently setup pipe_buffers. - * Returns TRUE if a flush is required. */ - boolean (*validate)(struct radeon_winsys* winsys); - - /* Check to see if there's room for commands. */ - boolean (*check_cs)(struct radeon_winsys* winsys, int size); - - /* Start a command emit. */ - void (*begin_cs)(struct radeon_winsys* winsys, - int size, - const char* file, - const char* function, - int line); - - /* Write a dword to the command buffer. */ - void (*write_cs_dword)(struct radeon_winsys* winsys, uint32_t dword); - - /* Write a relocated dword to the command buffer. */ - void (*write_cs_reloc)(struct radeon_winsys* winsys, - struct pipe_buffer* bo, - uint32_t rd, - uint32_t wd, - uint32_t flags); - - /* Finish a command emit. */ - void (*end_cs)(struct radeon_winsys* winsys, - const char* file, - const char* function, - int line); - - /* Flush the CS. */ - void (*flush_cs)(struct radeon_winsys* winsys); - - /* winsys flush - callback from winsys when flush required */ - void (*set_flush_cb)(struct radeon_winsys *winsys, - void (*flush_cb)(void *), void *data); - - void (*reset_bos)(struct radeon_winsys *winsys); - - void (*buffer_set_tiling)(struct radeon_winsys* winsys, - struct pipe_buffer* buffer, - uint32_t pitch, - boolean microtiled, - boolean macrotiled); + /* DRM FD */ + int fd; + + /* Radeon BO manager. */ + struct radeon_bo_manager *bom; + + /* Radeon CS manager. */ + struct radeon_cs_manager *csm; + + /* Current CS. */ + struct radeon_cs *cs; + + /* Flush CB */ + void (*flush_cb)(void *); + void *flush_data; }; +static INLINE struct radeon_libdrm_winsys * +radeon_winsys_screen(struct r300_winsys_screen *base) +{ + return (struct radeon_libdrm_winsys *)base; +} + #endif diff --git a/src/gallium/winsys/drm/sw/Makefile b/src/gallium/winsys/drm/sw/Makefile new file mode 100644 index 0000000000..5f3c3ec325 --- /dev/null +++ b/src/gallium/winsys/drm/sw/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = swdrm + +C_SOURCES = \ + wrapper_sw_winsys.c \ + sw_drm_api.c + +LIBRARY_INCLUDES = + +LIBRARY_DEFINES = + +include ../../../Makefile.template diff --git a/src/gallium/winsys/drm/sw/sw_drm_api.c b/src/gallium/winsys/drm/sw/sw_drm_api.c new file mode 100644 index 0000000000..0fd2163913 --- /dev/null +++ b/src/gallium/winsys/drm/sw/sw_drm_api.c @@ -0,0 +1,97 @@ +/********************************************************** + * Copyright 2010 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "util/u_memory.h" +#include "softpipe/sp_public.h" +#include "state_tracker/drm_api.h" +#include "wrapper_sw_winsys.h" +#include "sw_drm_api.h" + + +/* + * Defines + */ + + +struct sw_drm_api +{ + struct drm_api base; + struct drm_api *api; + struct sw_winsys *sw; +}; + +static INLINE struct sw_drm_api * +sw_drm_api(struct drm_api *api) +{ + return (struct sw_drm_api *)api; +} + + +/* + * Exported functions + */ + + +static struct pipe_screen * +sw_drm_create_screen(struct drm_api *_api, int drmFD, + struct drm_create_screen_arg *arg) +{ + struct sw_drm_api *swapi = sw_drm_api(_api); + struct drm_api *api = swapi->api; + struct sw_winsys *sww; + struct pipe_screen *screen; + + screen = api->create_screen(api, drmFD, arg); + + sww = wrapper_sw_winsys_warp_pipe_screen(screen); + + return softpipe_create_screen(sww); +} + +static void +sw_drm_destroy(struct drm_api *api) +{ + struct sw_drm_api *swapi = sw_drm_api(api); + if (swapi->api->destroy) + swapi->api->destroy(swapi->api); + + FREE(swapi); +} + +struct drm_api * +sw_drm_api_create(struct drm_api *api) +{ + struct sw_drm_api *swapi = CALLOC_STRUCT(sw_drm_api); + + swapi->base.name = "sw"; + swapi->base.driver_name = api->driver_name; + swapi->base.create_screen = sw_drm_create_screen; + swapi->base.destroy = sw_drm_destroy; + + swapi->api = api; + + return &swapi->base; +} diff --git a/src/gallium/winsys/drm/sw/sw_drm_api.h b/src/gallium/winsys/drm/sw/sw_drm_api.h new file mode 100644 index 0000000000..ce90a04ae0 --- /dev/null +++ b/src/gallium/winsys/drm/sw/sw_drm_api.h @@ -0,0 +1,34 @@ +/********************************************************** + * Copyright 2010 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#ifndef SW_DRM_API_H +#define SW_DRM_API_H + +struct drm_api; + +struct drm_api * sw_drm_api_create(struct drm_api *api); + +#endif diff --git a/src/gallium/winsys/drm/sw/wrapper_sw_winsys.c b/src/gallium/winsys/drm/sw/wrapper_sw_winsys.c new file mode 100644 index 0000000000..459b1c1e2a --- /dev/null +++ b/src/gallium/winsys/drm/sw/wrapper_sw_winsys.c @@ -0,0 +1,282 @@ +/********************************************************** + * Copyright 2010 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#include "wrapper_sw_winsys.h" + +#include "pipe/p_format.h" +#include "pipe/p_state.h" + +#include "state_tracker/sw_winsys.h" + +#include "util/u_memory.h" +#include "util/u_inlines.h" + +/* + * This code wraps a pipe_screen and exposes a sw_winsys interface for use + * with software resterizers. This code is used by the DRM based winsys to + * allow access to the drm driver. + * + * We must borrow the whole stack because only the pipe screen knows how + * to decode the content of a buffer. Or how to create a buffer that + * can still be used by drivers using real hardware (as the case is + * with software st/xorg but hw st/dri). + * + * We also need a pipe context for the transfers. + */ + +struct wrapper_sw_winsys +{ + struct sw_winsys base; + struct pipe_screen *screen; + struct pipe_context *pipe; +}; + +struct wrapper_sw_displaytarget +{ + struct wrapper_sw_winsys *winsys; + struct pipe_texture *tex; + struct pipe_transfer *transfer; + + unsigned width; + unsigned height; + unsigned map_count; + unsigned stride; /**< because we give stride at create */ + void *ptr; +}; + +static INLINE struct wrapper_sw_winsys * +wrapper_sw_winsys(struct sw_winsys *ws) +{ + return (struct wrapper_sw_winsys *)ws; +} + +static INLINE struct wrapper_sw_displaytarget * +wrapper_sw_displaytarget(struct sw_displaytarget *dt) +{ + return (struct wrapper_sw_displaytarget *)dt; +} + + +/* + * Functions + */ + + +static boolean +wsw_dt_get_stride(struct wrapper_sw_displaytarget *wdt, unsigned *stride) +{ + struct pipe_context *pipe = wdt->winsys->pipe; + struct pipe_texture *tex = wdt->tex; + struct pipe_transfer *tr; + + tr = pipe->get_tex_transfer(pipe, tex, 0, 0, 0, + PIPE_TRANSFER_READ_WRITE, + 0, 0, wdt->width, wdt->height); + if (!tr) + return FALSE; + + *stride = tr->stride; + wdt->stride = tr->stride; + + pipe->tex_transfer_destroy(pipe, tr); + + return TRUE; +} + +static struct sw_displaytarget * +wsw_dt_wrap_texture(struct wrapper_sw_winsys *wsw, + struct pipe_texture *tex, unsigned *stride) +{ + struct wrapper_sw_displaytarget *wdt = CALLOC_STRUCT(wrapper_sw_displaytarget); + if (!wdt) + goto err_unref; + + wdt->tex = tex; + wdt->winsys = wsw; + + if (!wsw_dt_get_stride(wdt, stride)) + goto err_free; + + return (struct sw_displaytarget *)wdt; + +err_free: + FREE(wdt); +err_unref: + pipe_texture_reference(&tex, NULL); + return NULL; +} + +static struct sw_displaytarget * +wsw_dt_create(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + struct pipe_texture templ; + struct pipe_texture *tex; + + /* + * XXX Why don't we just get the template. + */ + memset(&templ, 0, sizeof(templ)); + templ.width0 = width; + templ.height0 = height; + templ.format = format; + templ.tex_usage = tex_usage; + + /* XXX alignment: we can't do anything about this */ + + tex = wsw->screen->texture_create(wsw->screen, &templ); + if (!tex) + return NULL; + + return wsw_dt_wrap_texture(wsw, tex, stride); +} + +static struct sw_displaytarget * +wsw_dt_from_handle(struct sw_winsys *ws, + const struct pipe_texture *templ, + struct winsys_handle *whandle, + unsigned *stride) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + struct pipe_texture *tex; + + tex = wsw->screen->texture_from_handle(wsw->screen, templ, whandle); + if (!tex) + return NULL; + + return wsw_dt_wrap_texture(wsw, tex, stride); +} + +static void * +wsw_dt_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) +{ + struct wrapper_sw_displaytarget *wdt = wrapper_sw_displaytarget(dt); + struct pipe_context *pipe = wdt->winsys->pipe; + struct pipe_texture *tex = wdt->tex; + struct pipe_transfer *tr; + void *ptr; + + if (!wdt->map_count) { + + assert(!wdt->transfer); + + tr = pipe->get_tex_transfer(pipe, tex, 0, 0, 0, + PIPE_TRANSFER_READ_WRITE, + 0, 0, wdt->width, wdt->height); + if (!tr) + return NULL; + + ptr = pipe->transfer_map(pipe, tr); + if (!ptr) + goto err; + + wdt->transfer = tr; + wdt->ptr = ptr; + + /* XXX Handle this case */ + assert(tr->stride == wdt->stride); + } + + wdt->map_count++; + + return wdt->ptr; + +err: + pipe->tex_transfer_destroy(pipe, tr); + return NULL; +} + +static void +wsw_dt_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct wrapper_sw_displaytarget *wdt = wrapper_sw_displaytarget(dt); + struct pipe_context *pipe = wdt->winsys->pipe; + + assert(wdt->transfer); + + wdt->map_count--; + + if (wdt->map_count) + return; + + pipe->transfer_unmap(pipe, wdt->transfer); + pipe->tex_transfer_destroy(pipe, wdt->transfer); + wdt->transfer = NULL; +} + +static void +wsw_dt_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct wrapper_sw_displaytarget *wdt = wrapper_sw_displaytarget(dt); + + pipe_texture_reference(&wdt->tex, NULL); + + FREE(wdt); +} + +static void +wsw_destroy(struct sw_winsys *ws) +{ + struct wrapper_sw_winsys *wsw = wrapper_sw_winsys(ws); + + wsw->pipe->destroy(wsw->pipe); + wsw->screen->destroy(wsw->screen); + + FREE(wsw); +} + +struct sw_winsys * +wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen) +{ + struct wrapper_sw_winsys *wsw = CALLOC_STRUCT(wrapper_sw_winsys); + + wsw->base.displaytarget_create = wsw_dt_create; + wsw->base.displaytarget_from_handle = wsw_dt_from_handle; + wsw->base.displaytarget_map = wsw_dt_map; + wsw->base.displaytarget_unmap = wsw_dt_unmap; + wsw->base.displaytarget_destroy = wsw_dt_destroy; + wsw->base.destroy = wsw_destroy; + + wsw->screen = screen; + wsw->pipe = screen->context_create(screen, NULL); + if (!wsw->pipe) + goto err; + + return &wsw->base; + +err: + FREE(wsw); + return NULL; +} diff --git a/src/gallium/winsys/drm/sw/wrapper_sw_winsys.h b/src/gallium/winsys/drm/sw/wrapper_sw_winsys.h new file mode 100644 index 0000000000..b5c25a3c50 --- /dev/null +++ b/src/gallium/winsys/drm/sw/wrapper_sw_winsys.h @@ -0,0 +1,35 @@ +/********************************************************** + * Copyright 2010 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + + +#ifndef WRAPPER_SW_WINSYS +#define WRAPPER_SW_WINSYS + +struct sw_winsys; +struct pipe_screen; + +struct sw_winsys *wrapper_sw_winsys_warp_pipe_screen(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript index 84319f91ff..d26d0cd748 100644 --- a/src/gallium/winsys/drm/vmware/dri/SConscript +++ b/src/gallium/winsys/drm/vmware/dri/SConscript @@ -48,6 +48,7 @@ if env['platform'] == 'linux': svgadrm, svga, mesa, + glsl, gallium, ]) diff --git a/src/gallium/winsys/gdi/gdi_sw_winsys.c b/src/gallium/winsys/gdi/gdi_sw_winsys.c index f5c0b7d56e..4dba4b577b 100644 --- a/src/gallium/winsys/gdi/gdi_sw_winsys.c +++ b/src/gallium/winsys/gdi/gdi_sw_winsys.c @@ -71,6 +71,7 @@ gdi_sw_displaytarget( struct sw_displaytarget *buf ) static boolean gdi_sw_is_displaytarget_format_supported( struct sw_winsys *ws, + unsigned tex_usage, enum pipe_format format ) { switch(format) { @@ -119,6 +120,7 @@ gdi_sw_displaytarget_destroy(struct sw_winsys *winsys, static struct sw_displaytarget * gdi_sw_displaytarget_create(struct sw_winsys *winsys, + unsigned tex_usage, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, @@ -168,6 +170,27 @@ no_gdt: } +static struct sw_displaytarget * +gdi_sw_displaytarget_from_handle(struct sw_winsys *winsys, + const struct pipe_texture *templet, + struct winsys_handle *whandle, + unsigned *stride) +{ + assert(0); + return NULL; +} + + +static boolean +gdi_sw_displaytarget_get_handle(struct sw_winsys *winsys, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + assert(0); + return FALSE; +} + + void gdi_sw_display( struct sw_winsys *winsys, struct sw_displaytarget *dt, @@ -212,6 +235,8 @@ gdi_create_sw_winsys(void) winsys->destroy = gdi_sw_destroy; winsys->is_displaytarget_format_supported = gdi_sw_is_displaytarget_format_supported; winsys->displaytarget_create = gdi_sw_displaytarget_create; + winsys->displaytarget_from_handle = gdi_sw_displaytarget_from_handle; + winsys->displaytarget_get_handle = gdi_sw_displaytarget_get_handle; winsys->displaytarget_map = gdi_sw_displaytarget_map; winsys->displaytarget_unmap = gdi_sw_displaytarget_unmap; winsys->displaytarget_display = gdi_sw_displaytarget_display; diff --git a/src/gallium/winsys/null/null_sw_winsys.c b/src/gallium/winsys/null/null_sw_winsys.c index d961d34860..5027e57b30 100644 --- a/src/gallium/winsys/null/null_sw_winsys.c +++ b/src/gallium/winsys/null/null_sw_winsys.c @@ -44,6 +44,7 @@ static boolean null_sw_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, enum pipe_format format ) { return FALSE; @@ -78,6 +79,7 @@ null_sw_displaytarget_destroy(struct sw_winsys *winsys, static struct sw_displaytarget * null_sw_displaytarget_create(struct sw_winsys *winsys, + unsigned tex_usage, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, @@ -87,6 +89,26 @@ null_sw_displaytarget_create(struct sw_winsys *winsys, } +static struct sw_displaytarget * +null_sw_displaytarget_from_handle(struct sw_winsys *winsys, + const struct pipe_texture *templet, + struct winsys_handle *whandle, + unsigned *stride) +{ + return NULL; +} + + +static boolean +null_sw_displaytarget_get_handle(struct sw_winsys *winsys, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + assert(0); + return FALSE; +} + + static void null_sw_displaytarget_display(struct sw_winsys *winsys, struct sw_displaytarget *dt, @@ -115,6 +137,8 @@ null_sw_create(void) winsys->destroy = null_sw_destroy; winsys->is_displaytarget_format_supported = null_sw_is_displaytarget_format_supported; winsys->displaytarget_create = null_sw_displaytarget_create; + winsys->displaytarget_from_handle = null_sw_displaytarget_from_handle; + winsys->displaytarget_get_handle = null_sw_displaytarget_get_handle; winsys->displaytarget_map = null_sw_displaytarget_map; winsys->displaytarget_unmap = null_sw_displaytarget_unmap; winsys->displaytarget_display = null_sw_displaytarget_display; diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 68542b488d..18357a4112 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -6,7 +6,8 @@ LIBNAME = ws_xlib LIBRARY_INCLUDES = \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary + -I$(TOP)/src/gallium/auxiliary \ + $(X_CFLAGS) C_SOURCES = \ xlib_sw_winsys.c diff --git a/src/gallium/winsys/xlib/xlib_sw_winsys.c b/src/gallium/winsys/xlib/xlib_sw_winsys.c index cecfa4a53d..54789d7a87 100644 --- a/src/gallium/winsys/xlib/xlib_sw_winsys.c +++ b/src/gallium/winsys/xlib/xlib_sw_winsys.c @@ -208,6 +208,7 @@ alloc_ximage(struct xm_displaytarget *xm_dt, static boolean xm_is_displaytarget_format_supported( struct sw_winsys *ws, + unsigned tex_usage, enum pipe_format format ) { /* TODO: check visuals or other sensible thing here */ @@ -358,6 +359,7 @@ xm_displaytarget_display(struct sw_winsys *ws, static struct sw_displaytarget * xm_displaytarget_create(struct sw_winsys *winsys, + unsigned tex_usage, enum pipe_format format, unsigned width, unsigned height, unsigned alignment, @@ -406,6 +408,27 @@ no_xm_dt: } +static struct sw_displaytarget * +xm_displaytarget_from_handle(struct sw_winsys *winsys, + const struct pipe_texture *templet, + struct winsys_handle *whandle, + unsigned *stride) +{ + assert(0); + return NULL; +} + + +static boolean +xm_displaytarget_get_handle(struct sw_winsys *winsys, + struct sw_displaytarget *dt, + struct winsys_handle *whandle) +{ + assert(0); + return FALSE; +} + + static void xm_destroy( struct sw_winsys *ws ) { @@ -428,6 +451,8 @@ xlib_create_sw_winsys( Display *display ) ws->base.is_displaytarget_format_supported = xm_is_displaytarget_format_supported; ws->base.displaytarget_create = xm_displaytarget_create; + ws->base.displaytarget_from_handle = xm_displaytarget_from_handle; + ws->base.displaytarget_get_handle = xm_displaytarget_get_handle; ws->base.displaytarget_map = xm_displaytarget_map; ws->base.displaytarget_unmap = xm_displaytarget_unmap; ws->base.displaytarget_destroy = xm_displaytarget_destroy; diff --git a/src/glu/sgi/libnurbs/nurbtess/partitionY.h b/src/glu/sgi/libnurbs/nurbtess/partitionY.h index 8dda409de1..5570c183d7 100644 --- a/src/glu/sgi/libnurbs/nurbtess/partitionY.h +++ b/src/glu/sgi/libnurbs/nurbtess/partitionY.h @@ -39,7 +39,7 @@ *or both at or below v. In addition, at least one of the ajacent verteces is *strictly below or above v. * A vertex is a relex vertex if the internals angle is strictly greater than - *180. In other words, if the the signed area is negative: + *180. In other words, if the signed area is negative: *(x1, y1), (x2, y2), (x3, y3) are the three vertices along a polygon, the *order is such that left hand side is inside the polygon. Then (x2,y2) is *reflex if: diff --git a/src/glu/sgi/libtess/normal.c b/src/glu/sgi/libtess/normal.c index 0a2494be34..7ab83167bb 100644 --- a/src/glu/sgi/libtess/normal.c +++ b/src/glu/sgi/libtess/normal.c @@ -142,7 +142,7 @@ static void CheckOrientation( GLUtesselator *tess ) GLUhalfEdge *e; /* When we compute the normal automatically, we choose the orientation - * so that the the sum of the signed areas of all contours is non-negative. + * so that the sum of the signed areas of all contours is non-negative. */ area = 0; for( f = fHead->next; f != fHead; f = f->next ) { diff --git a/src/glx/single2.c b/src/glx/single2.c index 9ecf589fff..a1461956b9 100644 --- a/src/glx/single2.c +++ b/src/glx/single2.c @@ -44,6 +44,9 @@ #include <X11/Xlib-xcb.h> #endif /* USE_XCB */ +#if !defined(__GNUC__) +# define __builtin_expect(x, y) x +#endif /* Used for GL_ARB_transpose_matrix */ static void diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 4db2c215d0..c85085434e 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -251,6 +251,7 @@ if env['platform'] != 'winddk': 'glapi/glapi.c', 'glapi/glapi_dispatch.c', 'glapi/glapi_entrypoint.c', + 'glapi/glapi_execmem.c', 'glapi/glapi_getproc.c', 'glapi/glapi_nop.c', 'glapi/glthread.c', diff --git a/src/mesa/drivers/dri/common/dri_sw.c b/src/mesa/drivers/dri/common/dri_sw.c new file mode 100644 index 0000000000..b7f9036f47 --- /dev/null +++ b/src/mesa/drivers/dri/common/dri_sw.c @@ -0,0 +1,267 @@ +/* + * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2010 George Sapountzis <gsapountzis@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file dri_sw.c + * + * DRISW utility functions, i.e. dri_util.c stripped from drm-specific bits. + */ + +#include "dri_sw.h" +#include "utils.h" + + +/** + * Screen functions + */ + +static void +setupLoaderExtensions(__DRIscreen *psp, + const __DRIextension **extensions) +{ + int i; + + for (i = 0; extensions[i]; i++) { + if (strcmp(extensions[i]->name, __DRI_SWRAST_LOADER) == 0) + psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i]; + } +} + +static __DRIscreen * +driCreateNewScreen(int scrn, const __DRIextension **extensions, + const __DRIconfig ***driver_configs, void *data) +{ + static const __DRIextension *emptyExtensionList[] = { NULL }; + __DRIscreen *psp; + + (void) data; + + psp = CALLOC_STRUCT(__DRIscreenRec); + if (!psp) + return NULL; + + setupLoaderExtensions(psp, extensions); + + psp->extensions = emptyExtensionList; + psp->myNum = scrn; + + *driver_configs = driDriverAPI.InitScreen(psp); + + if (*driver_configs == NULL) { + FREE(psp); + return NULL; + } + + return psp; +} + +static void driDestroyScreen(__DRIscreen *psp) +{ + if (psp) { + driDriverAPI.DestroyScreen(psp); + + FREE(psp); + } +} + +static const __DRIextension **driGetExtensions(__DRIscreen *psp) +{ + return psp->extensions; +} + + +/** + * Context functions + */ + +static __DRIcontext * +driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config, + __DRIcontext *shared, void *data) +{ + __DRIcontext *pcp; + void * const shareCtx = (shared != NULL) ? shared->driverPrivate : NULL; + + pcp = CALLOC_STRUCT(__DRIcontextRec); + if (!pcp) + return NULL; + + pcp->loaderPrivate = data; + + pcp->driScreenPriv = psp; + pcp->driDrawablePriv = NULL; + pcp->driReadablePriv = NULL; + + if (!driDriverAPI.CreateContext(&config->modes, pcp, shareCtx)) { + FREE(pcp); + return NULL; + } + + return pcp; +} + +static void +driDestroyContext(__DRIcontext *pcp) +{ + if (pcp) { + driDriverAPI.DestroyContext(pcp); + FREE(pcp); + } +} + +static int +driCopyContext(__DRIcontext *dst, __DRIcontext *src, unsigned long mask) +{ + return GL_FALSE; +} + +static void dri_get_drawable(__DRIdrawable *pdp); +static void dri_put_drawable(__DRIdrawable *pdp); + +static int driBindContext(__DRIcontext *pcp, + __DRIdrawable *pdp, + __DRIdrawable *prp) +{ + /* Bind the drawable to the context */ + if (pcp) { + pcp->driDrawablePriv = pdp; + pcp->driReadablePriv = prp; + if (pdp) { + dri_get_drawable(pdp); + } + if ( prp && pdp != prp ) { + dri_get_drawable(prp); + } + } + + return driDriverAPI.MakeCurrent(pcp, pdp, prp); +} + +static int driUnbindContext(__DRIcontext *pcp) +{ + __DRIdrawable *pdp; + __DRIdrawable *prp; + + if (pcp == NULL) + return GL_FALSE; + + pdp = pcp->driDrawablePriv; + prp = pcp->driReadablePriv; + + /* already unbound */ + if (!pdp && !prp) + return GL_TRUE; + + driDriverAPI.UnbindContext(pcp); + + dri_put_drawable(pdp); + + if (prp != pdp) { + dri_put_drawable(prp); + } + + pcp->driDrawablePriv = NULL; + pcp->driReadablePriv = NULL; + + return GL_TRUE; +} + + +/** + * Drawable functions + */ + +static void dri_get_drawable(__DRIdrawable *pdp) +{ + pdp->refcount++; +} + +static void dri_put_drawable(__DRIdrawable *pdp) +{ + if (pdp) { + pdp->refcount--; + if (pdp->refcount) + return; + + driDriverAPI.DestroyBuffer(pdp); + + FREE(pdp); + } +} + +static __DRIdrawable * +driCreateNewDrawable(__DRIscreen *psp, + const __DRIconfig *config, void *data) +{ + __DRIdrawable *pdp; + + pdp = CALLOC_STRUCT(__DRIdrawableRec); + if (!pdp) + return NULL; + + pdp->loaderPrivate = data; + + pdp->driScreenPriv = psp; + + dri_get_drawable(pdp); + + if (!driDriverAPI.CreateBuffer(psp, pdp, &config->modes, GL_FALSE)) { + FREE(pdp); + return NULL; + } + + return pdp; +} + +static void +driDestroyDrawable(__DRIdrawable *pdp) +{ + dri_put_drawable(pdp); +} + +static void driSwapBuffers(__DRIdrawable *pdp) +{ + driDriverAPI.SwapBuffers(pdp); +} + +const __DRIcoreExtension driCoreExtension = { + { __DRI_CORE, __DRI_CORE_VERSION }, + NULL, /* driCreateNewScreen */ + driDestroyScreen, + driGetExtensions, + driGetConfigAttrib, + driIndexConfigAttrib, + NULL, /* driCreateNewDrawable */ + driDestroyDrawable, + driSwapBuffers, + driCreateNewContext, + driCopyContext, + driDestroyContext, + driBindContext, + driUnbindContext +}; + +const __DRIswrastExtension driSWRastExtension = { + { __DRI_SWRAST, __DRI_SWRAST_VERSION }, + driCreateNewScreen, + driCreateNewDrawable +}; diff --git a/src/mesa/drivers/dri/common/dri_sw.h b/src/mesa/drivers/dri/common/dri_sw.h new file mode 100644 index 0000000000..e353e26b34 --- /dev/null +++ b/src/mesa/drivers/dri/common/dri_sw.h @@ -0,0 +1,112 @@ +/* + * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2010 George Sapountzis <gsapountzis@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _DRI_SW_H +#define _DRI_SW_H + +#include <GL/gl.h> +#include <GL/internal/glcore.h> +#include <GL/internal/dri_interface.h> + + +/** + * Extensions + */ +extern const __DRIcoreExtension driCoreExtension; +extern const __DRIswrastExtension driSWRastExtension; + + +/** + * Data types + */ +struct __DRIscreenRec { + int myNum; + + int fd; + + void *private; + + const __DRIextension **extensions; + + const __DRIswrastLoaderExtension *swrast_loader; +}; + +struct __DRIcontextRec { + + void *driverPrivate; + + void *loaderPrivate; + + __DRIdrawable *driDrawablePriv; + + __DRIdrawable *driReadablePriv; + + __DRIscreen *driScreenPriv; +}; + +struct __DRIdrawableRec { + + void *driverPrivate; + + void *loaderPrivate; + + __DRIscreen *driScreenPriv; + + int refcount; +}; + + +/** + * Driver callback functions + */ +struct __DriverAPIRec { + const __DRIconfig **(*InitScreen) (__DRIscreen * priv); + + void (*DestroyScreen)(__DRIscreen *driScrnPriv); + + GLboolean (*CreateContext)(const __GLcontextModes *glVis, + __DRIcontext *driContextPriv, + void *sharedContextPrivate); + + void (*DestroyContext)(__DRIcontext *driContextPriv); + + GLboolean (*CreateBuffer)(__DRIscreen *driScrnPriv, + __DRIdrawable *driDrawPriv, + const __GLcontextModes *glVis, + GLboolean pixmapBuffer); + + void (*DestroyBuffer)(__DRIdrawable *driDrawPriv); + + void (*SwapBuffers)(__DRIdrawable *driDrawPriv); + + GLboolean (*MakeCurrent)(__DRIcontext *driContextPriv, + __DRIdrawable *driDrawPriv, + __DRIdrawable *driReadPriv); + + GLboolean (*UnbindContext)(__DRIcontext *driContextPriv); +}; + +extern const struct __DriverAPIRec driDriverAPI; + +#endif /* _DRI_SW_H */ diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 890ae51339..badbb5ff82 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -47,28 +47,6 @@ const __DRIextension driReadDrawableExtension = { __DRI_READ_DRAWABLE, __DRI_READ_DRAWABLE_VERSION }; -/** - * Print message to \c stderr if the \c LIBGL_DEBUG environment variable - * is set. - * - * Is called from the drivers. - * - * \param f \c printf like format string. - */ -void -__driUtilMessage(const char *f, ...) -{ - va_list args; - - if (getenv("LIBGL_DEBUG")) { - fprintf(stderr, "libGL: "); - va_start(args, f); - vfprintf(stderr, f, args); - va_end(args); - fprintf(stderr, "\n"); - } -} - GLint driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 ) { @@ -698,7 +676,7 @@ setupLoaderExtensions(__DRIscreen *psp, * \param drm_version Version of the kernel DRM. * \param frame_buffer Data describing the location and layout of the * framebuffer. - * \param pSAREA Pointer the the SAREA. + * \param pSAREA Pointer to the SAREA. * \param fd Device handle for the DRM. * \param extensions ?? * \param driver_modes Returns modes suppoted by the driver diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index 99c0f1e442..f63583cebc 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -552,10 +552,6 @@ struct __DRIscreenRec { }; extern void -__driUtilMessage(const char *f, ...); - - -extern void __driUtilUpdateDrawableInfo(__DRIdrawable *pdp); extern float diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index b85b364c57..0dd879abc9 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -37,6 +37,29 @@ #include "utils.h" +/** + * Print message to \c stderr if the \c LIBGL_DEBUG environment variable + * is set. + * + * Is called from the drivers. + * + * \param f \c printf like format string. + */ +void +__driUtilMessage(const char *f, ...) +{ + va_list args; + + if (getenv("LIBGL_DEBUG")) { + fprintf(stderr, "libGL: "); + va_start(args, f); + vfprintf(stderr, f, args); + va_end(args); + fprintf(stderr, "\n"); + } +} + + unsigned driParseDebugString( const char * debug, const struct dri_debug_control * control ) @@ -230,9 +253,6 @@ void driInitSingleExtension( GLcontext * ctx, /** * Utility function used by drivers to test the verions of other components. * - * If one of the version requirements is not met, a message is logged using - * \c __driUtilMessage. - * * \param driver_name Name of the driver. Used in error messages. * \param driActual Actual DRI version supplied __driCreateNewScreen. * \param driExpected Minimum DRI version required by the driver. @@ -244,7 +264,7 @@ void driInitSingleExtension( GLcontext * ctx, * \returns \c GL_TRUE if all version requirements are met. Otherwise, * \c GL_FALSE is returned. * - * \sa __driCreateNewScreen, driCheckDriDdxDrmVersions2, __driUtilMessage + * \sa __driCreateNewScreen, driCheckDriDdxDrmVersions2 * * \todo * Now that the old \c driCheckDriDdxDrmVersions function is gone, this diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h index 02ca3feb73..de6070c398 100644 --- a/src/mesa/drivers/dri/common/utils.h +++ b/src/mesa/drivers/dri/common/utils.h @@ -69,6 +69,9 @@ struct __DRIutilversionRec2 { int patch; /**< Patch-level. */ }; +extern void +__driUtilMessage(const char *f, ...); + extern unsigned driParseDebugString( const char * debug, const struct dri_debug_control * control ); diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 477259ea7e..de4500a39b 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -36,7 +36,7 @@ #include <unistd.h> #include <errno.h> #include "main/imports.h" -#include "dri_util.h" +#include "utils.h" #include "xmlconfig.h" #undef GET_PROGRAM_NAME diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index a242580273..842d4b7aa1 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ brw_gs_emit.c \ brw_gs_state.c \ brw_misc_state.c \ + brw_optimize.c \ brw_program.c \ brw_queryobj.c \ brw_sf.c \ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 8e9117b644..4f55158e8f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -966,4 +966,9 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); + + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c new file mode 100644 index 0000000000..57df9ea115 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +static GLboolean +is_single_channel_dp4(struct brw_instruction *insn) +{ + if (insn->header.opcode != BRW_OPCODE_DP4 || + insn->header.execution_size != BRW_EXECUTE_8 || + insn->header.access_mode != BRW_ALIGN_16 || + insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + if (!is_power_of_two(insn->bits1.da16.dest_writemask)) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Sets the dependency control fields on DP4 instructions. + * + * The hardware only tracks dependencies on a register basis, so when + * you do: + * + * DP4 dst.x src1 src2 + * DP4 dst.y src1 src3 + * DP4 dst.z src1 src4 + * DP4 dst.w src1 src5 + * + * It will wait to do the DP4 dst.y until the dst.x is resolved, etc. + * We can examine our instruction stream and set the dependency + * control fields to tell the hardware when to do it. + * + * We may want to extend this to other instructions that are used to + * fill in a channel at a time of the destination register. + */ +static void +brw_set_dp4_dependency_control(struct brw_compile *p) +{ + int i; + + for (i = 1; i < p->nr_insn; i++) { + struct brw_instruction *insn = &p->store[i]; + struct brw_instruction *prev = &p->store[i - 1]; + + if (!is_single_channel_dp4(prev)) + continue; + + if (!is_single_channel_dp4(insn)) { + i++; + continue; + } + + /* Only avoid hw dep control if the write masks are different + * channels of one reg. + */ + if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask) + continue; + if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr) + continue; + + /* Check if the second instruction depends on the previous one + * for a src. + */ + if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT || + insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + + prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; + insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; + } +} + +void +brw_optimize(struct brw_compile *p) +{ + brw_set_dp4_dependency_control(p); +} diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a48804a660..d16e916832 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -384,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c, { struct brw_compile *p = &c->func; - brw_MOV(p, dst, brw_imm_f(0.0f)); - brw_CMP(p, brw_null_reg(), cond, arg0, arg1); - brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_CMP(p, brw_null_reg(), cond, arg1, arg0); + brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f)); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1825,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) post_vs_emit(c, end_inst, last_inst); + brw_optimize(p); + if (INTEL_DEBUG & DEBUG_VS) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 412e09b76a..05e464d4b6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -62,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) } -/* Payload R0: +/** + * Computes the screen-space x,y position of the pixels. + * + * This will be used by emit_delta_xy() or emit_wpos_xy() for + * interpolation of attributes.. + * + * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. @@ -77,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.7 -- ? * R1.8 -- ? */ - void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) @@ -117,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c, brw_pop_insn_state(p); } - +/** + * Computes the screen-space x,y distance of the pixels from the start + * vertex. + * + * This will be used in linterp or pinterp with the start vertex value + * and the Cx, Cy, and C0 coefficients passed in from the setup engine + * to produce interpolated attribute values. + */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -125,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p, { struct brw_reg r1 = brw_vec1_grf(1, 0); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - } + if (mask == 0) + return; - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); + assert(mask == WRITEMASK_XY); - } + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers produced by emit_pixel_xy(). + */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); } +/** + * Computes the pixel offset from the window origin for gl_FragCoord(). + */ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, @@ -151,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { if (c->fp->program.PixelCenterInteger) { /* X' = X */ @@ -534,11 +545,8 @@ void emit_sop(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]); + brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0)); brw_pop_insn_state(p); } } @@ -633,14 +641,10 @@ void emit_max(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -657,14 +661,10 @@ void emit_min(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); - brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1118,11 +1118,19 @@ static void emit_kil( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - GLuint i; - - /* XXX - usually won't need 4 compares! - */ + GLuint i, j; + for (i = 0; i < 4; i++) { + /* Check if we've already done the comparison for this reg + * -- common when someone does KIL TEMP.wwww. + */ + for (j = 0; j < i; j++) { + if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) + break; + } + if (j != i) + continue; + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c index 77e7e53ce0..73b1e08d4b 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.c +++ b/src/mesa/drivers/dri/mach64/mach64_context.c @@ -31,6 +31,7 @@ #include "main/glheader.h" #include "main/context.h" +#include "main/extensions.h" #include "main/simple_list.h" #include "main/imports.h" diff --git a/src/mesa/drivers/dri/r128/r128_tex.c b/src/mesa/drivers/dri/r128/r128_tex.c index 24fbf8f519..4ec4be9a47 100644 --- a/src/mesa/drivers/dri/r128/r128_tex.c +++ b/src/mesa/drivers/dri/r128/r128_tex.c @@ -468,7 +468,7 @@ static void r128TexEnv( GLcontext *ctx, GLenum target, * certain point. It is better than completely ignoring the LOD * bias. Unfortunately there isn't much range in the bias, the * spec mentions strides that vary between 0.5 and 2.0 but these - * numbers don't seem to relate the the GL LOD bias value at all. + * numbers don't seem to relate to the GL LOD bias value at all. */ if ( param[0] >= 1.0 ) { bias = -128; diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 5739443bdf..36a29350cc 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -324,7 +324,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, _mesa_init_driver_functions(&functions); r200InitDriverFuncs(&functions); r200InitIoctlFuncs(&functions); - r200InitStateFuncs(&functions); + r200InitStateFuncs(&rmesa->radeon, &functions); r200InitTextureFuncs(&rmesa->radeon, &functions); r200InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h index 2e52c7c13f..e331be223b 100644 --- a/src/mesa/drivers/dri/r200/r200_reg.h +++ b/src/mesa/drivers/dri/r200/r200_reg.h @@ -690,7 +690,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # define R200_PVS_CNTL_1_PROGRAM_START_SHIFT 0 # define R200_PVS_CNTL_1_POS_END_SHIFT 10 # define R200_PVS_CNTL_1_PROGRAM_END_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R200_VAP_PVS_CNTL_2 0x22d4 # define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 # define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 71f764aaae..9c2ac05ad6 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2488,16 +2488,19 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) } /* Initialize the driver's state functions. */ -void r200InitStateFuncs( struct dd_function_table *functions ) +void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions ) { functions->UpdateState = r200InvalidateState; functions->LightingSpaceChange = r200LightingSpaceChange; functions->DrawBuffer = radeonDrawBuffer; functions->ReadBuffer = radeonReadBuffer; - functions->CopyPixels = _mesa_meta_CopyPixels; - functions->DrawPixels = _mesa_meta_DrawPixels; - functions->ReadPixels = radeonReadPixels; + + if (radeon->radeonScreen->kernel_mm) { + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + functions->ReadPixels = radeonReadPixels; + } functions->AlphaFunc = r200AlphaFunc; functions->BlendColor = r200BlendColor; diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 7b9b0c106a..327ba837e2 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200InitState( r200ContextPtr rmesa ); -extern void r200InitStateFuncs( struct dd_function_table *functions ); +extern void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions ); extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index d870c7f852..fa60628a5e 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -582,12 +582,6 @@ unsigned r300_blit(GLcontext *ctx, if (dst_pitch % 2 > 0) ++dst_pitch; - /* Rendering to small buffer doesn't work. - * Looks like a hw limitation. - */ - if (dst_pitch < 32) - return 0; - /* Need to clamp the region size to make sure * we don't read outside of the source buffer * or write outside of the destination buffer. diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 364e0ba6b6..cfeb5407e9 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -500,7 +500,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); - r300InitStateFuncs(&functions); + r300InitStateFuncs(&r300->radeon, &functions); r300InitTextureFuncs(&r300->radeon, &functions); r300InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index d18ebab8ff..ac93563ed9 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -482,7 +482,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST_SHIFT 0 # define R300_PVS_XYZW_VALID_INST_SHIFT 10 # define R300_PVS_LAST_INST_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1760,7 +1760,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e75c88e101..749a2464e7 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2354,7 +2354,7 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode) /** * Initialize driver's state callback functions */ -void r300InitStateFuncs(struct dd_function_table *functions) +void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { functions->UpdateState = r300InvalidateState; @@ -2396,9 +2396,11 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->DrawBuffer = radeonDrawBuffer; functions->ReadBuffer = radeonReadBuffer; - functions->CopyPixels = _mesa_meta_CopyPixels; - functions->DrawPixels = _mesa_meta_DrawPixels; - functions->ReadPixels = radeonReadPixels; + if (radeon->radeonScreen->kernel_mm) { + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + functions->ReadPixels = radeonReadPixels; + } } void r300InitShaderFunctions(r300ContextPtr r300) diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index d46bf9f179..e70f84f4e4 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -55,7 +55,7 @@ void r300UpdateDrawBuffer (GLcontext * ctx); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); -void r300InitStateFuncs (struct dd_function_table *functions); +void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions); void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count); void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten); diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 76d5027649..fddac2f9bd 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -384,7 +384,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, */ _mesa_init_driver_functions(&functions); - r700InitStateFuncs(&functions); + r700InitStateFuncs(&r600->radeon, &functions); r600InitTextureFuncs(&r600->radeon, &functions); r700InitShaderFuncs(&functions); radeonInitQueryObjFunctions(&functions); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 8797f8059a..2953ffd028 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1817,7 +1817,7 @@ void r700InitState(GLcontext * ctx) //------------------- } -void r700InitStateFuncs(struct dd_function_table *functions) //----------------- +void r700InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions) { functions->UpdateState = r700InvalidateState; functions->AlphaFunc = r700AlphaFunc; @@ -1861,9 +1861,10 @@ void r700InitStateFuncs(struct dd_function_table *functions) //----------------- functions->DrawBuffer = radeonDrawBuffer; functions->ReadBuffer = radeonReadBuffer; - functions->CopyPixels = _mesa_meta_CopyPixels; - functions->DrawPixels = _mesa_meta_DrawPixels; - functions->ReadPixels = radeonReadPixels; - + if (radeon->radeonScreen->kernel_mm) { + functions->CopyPixels = _mesa_meta_CopyPixels; + functions->DrawPixels = _mesa_meta_DrawPixels; + functions->ReadPixels = radeonReadPixels; + } } diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 60c6a7f23c..56885e0b15 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -40,7 +40,7 @@ extern void r700UpdateShaderStates(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); extern void r700InitState (GLcontext * ctx); -extern void r700InitStateFuncs (struct dd_function_table *functions); +extern void r700InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions); extern void r700SetScissor(context_t *context); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 07e0adc890..05c65164d6 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -42,7 +42,7 @@ #include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" -#include "shader/programopt.c" +#include "shader/programopt.h" #include "r700_debug.h" #include "r700_vertprog.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 78c5f5dd57..7f5fb99fa4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -553,6 +553,8 @@ static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj, } if (mtCount == 0) { + free(mtSizes); + free(mts); return NULL; } diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c index b180c1d9a5..dadb8002c7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c +++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c @@ -50,22 +50,33 @@ static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type) break; case GL_RGBA: switch (type) { - case GL_UNSIGNED_BYTE: - return MESA_FORMAT_RGBA8888_REV; case GL_FLOAT: return MESA_FORMAT_RGBA_FLOAT32; + case GL_UNSIGNED_SHORT_5_5_5_1: + return MESA_FORMAT_RGBA5551; + case GL_UNSIGNED_INT_8_8_8_8: + return MESA_FORMAT_RGBA8888; + case GL_UNSIGNED_BYTE: + case GL_UNSIGNED_INT_8_8_8_8_REV: + return MESA_FORMAT_RGBA8888_REV; + } + break; + case GL_BGRA: + switch (type) { case GL_UNSIGNED_SHORT_4_4_4_4: - return MESA_FORMAT_ARGB4444; + return MESA_FORMAT_ARGB4444_REV; case GL_UNSIGNED_SHORT_4_4_4_4_REV: return MESA_FORMAT_ARGB4444; case GL_UNSIGNED_SHORT_5_5_5_1: - return MESA_FORMAT_RGBA5551; - case GL_UNSIGNED_SHORT_1_5_5_5_REV: return MESA_FORMAT_ARGB1555_REV; + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return MESA_FORMAT_ARGB1555; case GL_UNSIGNED_INT_8_8_8_8: - return MESA_FORMAT_ARGB8888; - case GL_UNSIGNED_INT_8_8_8_8_REV: return MESA_FORMAT_ARGB8888_REV; + case GL_UNSIGNED_BYTE: + case GL_UNSIGNED_INT_8_8_8_8_REV: + return MESA_FORMAT_ARGB8888; + } break; } diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index ec5612fdd7..0afbc19c12 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -1901,7 +1901,7 @@ void radeonUploadTexMatrix( r100ContextPtr rmesa, So: if we need the q coord in the end (solely determined by the texture target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row. Additionally, if we don't have texgen but 4 tex coords submitted, we swap - column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord + column 3 and 4 (for the 2d / 1d / texrect targets) since the q coord will get submitted in the "wrong", i.e. 3rd, slot. If an app submits 3 coords for 2d targets, we assume it is saving on vertex size and using the texture matrix to swap the r and q coords around (ut2k3 @@ -2249,9 +2249,11 @@ void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 ) ctx->Driver.DrawBuffer = radeonDrawBuffer; ctx->Driver.ReadBuffer = radeonReadBuffer; - ctx->Driver.CopyPixels = _mesa_meta_CopyPixels; - ctx->Driver.DrawPixels = _mesa_meta_DrawPixels; - ctx->Driver.ReadPixels = radeonReadPixels; + if (dri2) { + ctx->Driver.CopyPixels = _mesa_meta_CopyPixels; + ctx->Driver.DrawPixels = _mesa_meta_DrawPixels; + ctx->Driver.ReadPixels = radeonReadPixels; + } ctx->Driver.AlphaFunc = radeonAlphaFunc; ctx->Driver.BlendEquationSeparate = radeonBlendEquationSeparate; diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index e57d77e7ef..29fd31ac23 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -28,6 +28,7 @@ #include "radeon_common.h" #include "radeon_texture.h" +#include "main/enums.h" #include "main/image.h" #include "main/teximage.h" #include "main/texstate.h" @@ -59,18 +60,27 @@ do_copy_texsubimage(GLcontext *ctx, } if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { - rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + if (ctx->ReadBuffer->_DepthBuffer && ctx->ReadBuffer->_DepthBuffer->Wrapped) { + rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer->Wrapped); + } else { + rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + } flip_y = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Type == GL_NONE; } else { rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); flip_y = ctx->ReadBuffer->Attachment[BUFFER_COLOR0].Type == GL_NONE; } + // This is software renderbuffer, fallback to swrast + if (!rrb) { + return GL_FALSE; + } + if (!timg->mt) { radeon_validate_texture_miptree(ctx, &tobj->base); } - assert(rrb && rrb->bo); + assert(rrb->bo); assert(timg->mt); assert(timg->mt->bo); assert(timg->base.Width >= dstx + width); @@ -174,6 +184,10 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, return; fail: + radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, + "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", + _mesa_lookup_enum_by_nr(internalFormat), border); + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, width, height, border); } @@ -192,7 +206,8 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, radeon_tex_obj(texObj), (radeon_texture_image *)texImage, xoffset, yoffset, x, y, width, height)) { - //DEBUG_FALLBACKS + radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, + "Falling back to sw for glCopyTexSubImage2D\n"); _mesa_meta_CopyTexSubImage2D(ctx, target, level, xoffset, yoffset, x, y, width, height); diff --git a/src/mesa/drivers/dri/swrast/Makefile b/src/mesa/drivers/dri/swrast/Makefile index 771169c1ff..cc59eefdb2 100644 --- a/src/mesa/drivers/dri/swrast/Makefile +++ b/src/mesa/drivers/dri/swrast/Makefile @@ -17,7 +17,8 @@ ASM_SOURCES = SWRAST_COMMON_SOURCES = \ ../../common/driverfuncs.c \ - ../common/utils.c + ../common/utils.c \ + ../common/dri_sw.c include ../Makefile.template diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 03c672ecf1..e9ca99a86f 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 George Sapountzis <gsap7@yahoo.gr> + * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -54,17 +54,9 @@ * Screen and config-related functions */ -static void -setupLoaderExtensions(__DRIscreen *psp, - const __DRIextension **extensions) -{ - int i; - - for (i = 0; extensions[i]; i++) { - if (strcmp(extensions[i]->name, __DRI_SWRAST_LOADER) == 0) - psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i]; - } -} +static const __DRIextension *dri_screen_extensions[] = { + NULL +}; static __DRIconfig ** swrastFillInModes(__DRIscreen *psp, @@ -143,26 +135,14 @@ swrastFillInModes(__DRIscreen *psp, return configs; } -static __DRIscreen * -driCreateNewScreen(int scrn, const __DRIextension **extensions, - const __DRIconfig ***driver_configs, void *data) +static const __DRIconfig ** +dri_init_screen(__DRIscreen * psp) { - static const __DRIextension *emptyExtensionList[] = { NULL }; - __DRIscreen *psp; __DRIconfig **configs8, **configs16, **configs24, **configs32; - (void) data; - TRACE; - psp = calloc(1, sizeof(*psp)); - if (!psp) - return NULL; - - setupLoaderExtensions(psp, extensions); - - psp->num = scrn; - psp->extensions = emptyExtensionList; + psp->extensions = dri_screen_extensions; configs8 = swrastFillInModes(psp, 8, 8, 0, 1); configs16 = swrastFillInModes(psp, 16, 16, 0, 1); @@ -171,28 +151,15 @@ driCreateNewScreen(int scrn, const __DRIextension **extensions, configs16 = driConcatConfigs(configs8, configs16); configs24 = driConcatConfigs(configs16, configs24); - *driver_configs = (const __DRIconfig **) - driConcatConfigs(configs24, configs32); - - driInitExtensions( NULL, NULL, GL_FALSE ); - - return psp; -} - -static void driDestroyScreen(__DRIscreen *psp) -{ - TRACE; + configs32 = driConcatConfigs(configs24, configs32); - if (psp) { - free(psp); - } + return (const __DRIconfig **)configs32; } -static const __DRIextension **driGetExtensions(__DRIscreen *psp) +static void +dri_destroy_screen(__DRIscreen * sPriv) { TRACE; - - return psp->extensions; } @@ -336,94 +303,116 @@ swrast_new_renderbuffer(const GLvisual *visual, GLboolean front) return xrb; } -static __DRIdrawable * -driCreateNewDrawable(__DRIscreen *screen, - const __DRIconfig *config, void *data) +static GLboolean +dri_create_buffer(__DRIscreen * sPriv, + __DRIdrawable * dPriv, + const __GLcontextModes * visual, GLboolean isPixmap) { - __DRIdrawable *buf; + struct dri_drawable *drawable = NULL; + GLframebuffer *fb; struct swrast_renderbuffer *frontrb, *backrb; TRACE; - buf = calloc(1, sizeof *buf); - if (!buf) - return NULL; + drawable = CALLOC_STRUCT(dri_drawable); + if (drawable == NULL) + goto drawable_fail; - buf->loaderPrivate = data; + dPriv->driverPrivate = drawable; + drawable->dPriv = dPriv; - buf->driScreenPriv = screen; + drawable->row = malloc(MAX_WIDTH * 4); + if (drawable->row == NULL) + goto drawable_fail; - buf->row = malloc(MAX_WIDTH * 4); + fb = &drawable->Base; /* basic framebuffer setup */ - _mesa_initialize_window_framebuffer(&buf->Base, &config->modes); + _mesa_initialize_window_framebuffer(fb, visual); /* add front renderbuffer */ - frontrb = swrast_new_renderbuffer(&config->modes, GL_TRUE); - _mesa_add_renderbuffer(&buf->Base, BUFFER_FRONT_LEFT, &frontrb->Base); + frontrb = swrast_new_renderbuffer(visual, GL_TRUE); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontrb->Base); /* add back renderbuffer */ - if (config->modes.doubleBufferMode) { - backrb = swrast_new_renderbuffer(&config->modes, GL_FALSE); - _mesa_add_renderbuffer(&buf->Base, BUFFER_BACK_LEFT, &backrb->Base); + if (visual->doubleBufferMode) { + backrb = swrast_new_renderbuffer(visual, GL_FALSE); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backrb->Base); } /* add software renderbuffers */ - _mesa_add_soft_renderbuffers(&buf->Base, + _mesa_add_soft_renderbuffers(fb, GL_FALSE, /* color */ - config->modes.haveDepthBuffer, - config->modes.haveStencilBuffer, - config->modes.haveAccumBuffer, + visual->haveDepthBuffer, + visual->haveStencilBuffer, + visual->haveAccumBuffer, GL_FALSE, /* alpha */ GL_FALSE /* aux bufs */); - return buf; + return GL_TRUE; + +drawable_fail: + + if (drawable) + free(drawable->row); + + FREE(drawable); + + return GL_FALSE; } static void -driDestroyDrawable(__DRIdrawable *buf) +dri_destroy_buffer(__DRIdrawable * dPriv) { TRACE; - if (buf) { - struct gl_framebuffer *fb = &buf->Base; + if (dPriv) { + struct dri_drawable *drawable = dri_drawable(dPriv); + GLframebuffer *fb; - free(buf->row); + free(drawable->row); + + fb = &drawable->Base; fb->DeletePending = GL_TRUE; _mesa_reference_framebuffer(&fb, NULL); } } -static void driSwapBuffers(__DRIdrawable *buf) +static void +dri_swap_buffers(__DRIdrawable * dPriv) { - GET_CURRENT_CONTEXT(ctx); + __DRIscreen *sPriv = dPriv->driScreenPriv; - struct swrast_renderbuffer *frontrb = - swrast_renderbuffer(buf->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer); - struct swrast_renderbuffer *backrb = - swrast_renderbuffer(buf->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer); + GET_CURRENT_CONTEXT(ctx); - __DRIscreen *screen = buf->driScreenPriv; + struct dri_drawable *drawable = dri_drawable(dPriv); + GLframebuffer *fb; + struct swrast_renderbuffer *frontrb, *backrb; TRACE; + fb = &drawable->Base; + + frontrb = swrast_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + backrb = swrast_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + /* check for signle-buffered */ if (backrb == NULL) return; /* check if swapping currently bound buffer */ - if (ctx && ctx->DrawBuffer == &(buf->Base)) { + if (ctx && ctx->DrawBuffer == fb) { /* flush pending rendering */ _mesa_notifySwapBuffers(ctx); } - screen->swrast_loader->putImage(buf, __DRI_SWRAST_IMAGE_OP_SWAP, - 0, 0, - frontrb->Base.Width, - frontrb->Base.Height, - backrb->Base.Data, - buf->loaderPrivate); + sPriv->swrast_loader->putImage(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP, + 0, 0, + frontrb->Base.Width, + frontrb->Base.Height, + backrb->Base.Data, + dPriv->loaderPrivate); } @@ -434,13 +423,13 @@ static void driSwapBuffers(__DRIdrawable *buf) static void get_window_size( GLframebuffer *fb, GLsizei *w, GLsizei *h ) { - __DRIdrawable *buf = swrast_drawable(fb); - __DRIscreen *screen = buf->driScreenPriv; + __DRIdrawable *dPriv = swrast_drawable(fb)->dPriv; + __DRIscreen *sPriv = dPriv->driScreenPriv; int x, y; - screen->swrast_loader->getDrawableInfo(buf, - &x, &y, w, h, - buf->loaderPrivate); + sPriv->swrast_loader->getDrawableInfo(dPriv, + &x, &y, w, h, + dPriv->loaderPrivate); } static void @@ -502,37 +491,40 @@ swrast_init_driver_functions(struct dd_function_table *driver) * Context-related functions. */ -static __DRIcontext * -driCreateNewContext(__DRIscreen *screen, const __DRIconfig *config, - __DRIcontext *shared, void *data) +static GLboolean +dri_create_context(const __GLcontextModes * visual, + __DRIcontext * cPriv, void *sharedContextPrivate) { - __DRIcontext *ctx; - GLcontext *mesaCtx; + struct dri_context *ctx = NULL; + struct dri_context *share = (struct dri_context *)sharedContextPrivate; + GLcontext *mesaCtx = NULL; + GLcontext *sharedCtx = NULL; struct dd_function_table functions; TRACE; - ctx = calloc(1, sizeof *ctx); - if (!ctx) - return NULL; - - ctx->loaderPrivate = data; + ctx = CALLOC_STRUCT(dri_context); + if (ctx == NULL) + goto context_fail; - ctx->driScreenPriv = screen; + cPriv->driverPrivate = ctx; + ctx->cPriv = cPriv; /* build table of device driver functions */ _mesa_init_driver_functions(&functions); swrast_init_driver_functions(&functions); - if (!_mesa_initialize_context(&ctx->Base, &config->modes, - shared ? &shared->Base : NULL, - &functions, (void *) ctx)) { - free(ctx); - return NULL; + if (share) { + sharedCtx = &share->Base; } mesaCtx = &ctx->Base; + /* basic context setup */ + if (!_mesa_initialize_context(mesaCtx, visual, sharedCtx, &functions, (void *) cPriv)) { + goto context_fail; + } + /* do bounds checking to prevent segfaults and server crashes! */ mesaCtx->Const.CheckArrayBounds = GL_TRUE; @@ -558,17 +550,28 @@ driCreateNewContext(__DRIscreen *screen, const __DRIconfig *config, _mesa_meta_init(mesaCtx); - return ctx; + driInitExtensions( mesaCtx, NULL, GL_FALSE ); + + return GL_TRUE; + +context_fail: + + FREE(ctx); + + return GL_FALSE; } static void -driDestroyContext(__DRIcontext *ctx) +dri_destroy_context(__DRIcontext * cPriv) { - GLcontext *mesaCtx; TRACE; - if (ctx) { + if (cPriv) { + struct dri_context *ctx = dri_context(cPriv); + GLcontext *mesaCtx; + mesaCtx = &ctx->Base; + _mesa_meta_free(mesaCtx); _swsetup_DestroyContext( mesaCtx ); _swrast_DestroyContext( mesaCtx ); @@ -578,26 +581,22 @@ driDestroyContext(__DRIcontext *ctx) } } -static int -driCopyContext(__DRIcontext *dst, __DRIcontext *src, unsigned long mask) -{ - TRACE; - - _mesa_copy_context(&src->Base, &dst->Base, mask); - return GL_TRUE; -} - -static int driBindContext(__DRIcontext *ctx, - __DRIdrawable *draw, - __DRIdrawable *read) +static GLboolean +dri_make_current(__DRIcontext * cPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) { GLcontext *mesaCtx; GLframebuffer *mesaDraw; GLframebuffer *mesaRead; TRACE; - if (ctx) { - if (!draw || !read) + if (cPriv) { + struct dri_context *ctx = dri_context(cPriv); + struct dri_drawable *draw = dri_drawable(driDrawPriv); + struct dri_drawable *read = dri_drawable(driReadPriv); + + if (!driDrawPriv || !driReadPriv) return GL_FALSE; mesaCtx = &ctx->Base; @@ -614,7 +613,7 @@ static int driBindContext(__DRIcontext *ctx, _glapi_check_multithread(); swrast_check_and_update_window_size(mesaCtx, mesaDraw); - if (read != draw) + if (mesaRead != mesaDraw) swrast_check_and_update_window_size(mesaCtx, mesaRead); _mesa_make_current( mesaCtx, @@ -629,35 +628,25 @@ static int driBindContext(__DRIcontext *ctx, return GL_TRUE; } -static int driUnbindContext(__DRIcontext *ctx) +static GLboolean +dri_unbind_context(__DRIcontext * cPriv) { TRACE; - (void) ctx; + (void) cPriv; return GL_TRUE; } -static const __DRIcoreExtension driCoreExtension = { - { __DRI_CORE, __DRI_CORE_VERSION }, - NULL, /* driCreateNewScreen */ - driDestroyScreen, - driGetExtensions, - driGetConfigAttrib, - driIndexConfigAttrib, - NULL, /* driCreateNewDrawable */ - driDestroyDrawable, - driSwapBuffers, - driCreateNewContext, - driCopyContext, - driDestroyContext, - driBindContext, - driUnbindContext -}; - -static const __DRIswrastExtension driSWRastExtension = { - { __DRI_SWRAST, __DRI_SWRAST_VERSION }, - driCreateNewScreen, - driCreateNewDrawable +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = dri_init_screen, + .DestroyScreen = dri_destroy_screen, + .CreateContext = dri_create_context, + .DestroyContext = dri_destroy_context, + .CreateBuffer = dri_create_buffer, + .DestroyBuffer = dri_destroy_buffer, + .SwapBuffers = dri_swap_buffers, + .MakeCurrent = dri_make_current, + .UnbindContext = dri_unbind_context, }; /* This is the table of extensions that the loader will dlsym() for. */ diff --git a/src/mesa/drivers/dri/swrast/swrast_priv.h b/src/mesa/drivers/dri/swrast/swrast_priv.h index 4722007f95..77670d89a5 100644 --- a/src/mesa/drivers/dri/swrast/swrast_priv.h +++ b/src/mesa/drivers/dri/swrast/swrast_priv.h @@ -3,6 +3,7 @@ * Version: 7.1 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,11 +23,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* - * Authors: - * George Sapountzis <gsap7@yahoo.gr> - */ - #ifndef _SWRAST_PRIV_H #define _SWRAST_PRIV_H @@ -34,6 +30,7 @@ #include <GL/gl.h> #include <GL/internal/dri_interface.h> #include "main/mtypes.h" +#include "dri_sw.h" /** @@ -58,33 +55,51 @@ /** * Data types */ -struct __DRIscreenRec { - int num; - - const __DRIextension **extensions; +struct dri_context +{ + /* mesa, base class, must be first */ + GLcontext Base; - const __DRIswrastLoaderExtension *swrast_loader; + /* dri */ + __DRIcontext *cPriv; }; -struct __DRIcontextRec { - GLcontext Base; - - void *loaderPrivate; +static INLINE struct dri_context * +dri_context(__DRIcontext * driContextPriv) +{ + return (struct dri_context *)driContextPriv->driverPrivate; +} - __DRIscreen *driScreenPriv; -}; +static INLINE struct dri_context * +swrast_context(GLcontext *ctx) +{ + return (struct dri_context *) ctx; +} -struct __DRIdrawableRec { +struct dri_drawable +{ + /* mesa, base class, must be first */ GLframebuffer Base; - void *loaderPrivate; - - __DRIscreen *driScreenPriv; + /* dri */ + __DRIdrawable *dPriv; /* scratch row for optimized front-buffer rendering */ char *row; }; +static INLINE struct dri_drawable * +dri_drawable(__DRIdrawable * driDrawPriv) +{ + return (struct dri_drawable *)driDrawPriv->driverPrivate; +} + +static INLINE struct dri_drawable * +swrast_drawable(GLframebuffer *fb) +{ + return (struct dri_drawable *) fb; +} + struct swrast_renderbuffer { struct gl_renderbuffer Base; @@ -94,18 +109,6 @@ struct swrast_renderbuffer { GLuint bpp; }; -static INLINE __DRIcontext * -swrast_context(GLcontext *ctx) -{ - return (__DRIcontext *) ctx; -} - -static INLINE __DRIdrawable * -swrast_drawable(GLframebuffer *fb) -{ - return (__DRIdrawable *) fb; -} - static INLINE struct swrast_renderbuffer * swrast_renderbuffer(struct gl_renderbuffer *rb) { diff --git a/src/mesa/drivers/dri/swrast/swrast_span.c b/src/mesa/drivers/dri/swrast/swrast_span.c index 5290dc82b9..c5681e34a9 100644 --- a/src/mesa/drivers/dri/swrast/swrast_span.c +++ b/src/mesa/drivers/dri/swrast/swrast_span.c @@ -3,6 +3,7 @@ * Version: 7.1 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,11 +23,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* - * Authors: - * George Sapountzis <gsap7@yahoo.gr> - */ - #include "swrast_priv.h" #define YFLIP(_xrb, Y) ((_xrb)->Base.Height - (Y) - 1) diff --git a/src/mesa/drivers/dri/swrast/swrast_spantemp.h b/src/mesa/drivers/dri/swrast/swrast_spantemp.h index 879a0c12e7..079726ae4a 100644 --- a/src/mesa/drivers/dri/swrast/swrast_spantemp.h +++ b/src/mesa/drivers/dri/swrast/swrast_spantemp.h @@ -39,8 +39,8 @@ static INLINE void PUT_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) { - __DRIcontext *ctx = swrast_context(glCtx); - __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer); + __DRIcontext *ctx = swrast_context(glCtx)->cPriv; + __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer)->dPriv; __DRIscreen *screen = ctx->driScreenPriv; @@ -53,8 +53,8 @@ PUT_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) static INLINE void GET_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) { - __DRIcontext *ctx = swrast_context(glCtx); - __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer); + __DRIcontext *ctx = swrast_context(glCtx)->cPriv; + __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer)->dPriv; __DRIscreen *screen = ctx->driScreenPriv; @@ -65,8 +65,8 @@ GET_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p ) static INLINE void PUT_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row ) { - __DRIcontext *ctx = swrast_context(glCtx); - __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer); + __DRIcontext *ctx = swrast_context(glCtx)->cPriv; + __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer)->dPriv; __DRIscreen *screen = ctx->driScreenPriv; @@ -78,8 +78,8 @@ PUT_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row ) static INLINE void GET_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row ) { - __DRIcontext *ctx = swrast_context(glCtx); - __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer); + __DRIcontext *ctx = swrast_context(glCtx)->cPriv; + __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer)->dPriv; __DRIscreen *screen = ctx->driScreenPriv; diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h index 98139af833..f63626a970 100644 --- a/src/mesa/drivers/x11/xmesa.h +++ b/src/mesa/drivers/x11/xmesa.h @@ -287,7 +287,7 @@ extern void XMesaCopySubBuffer( XMesaBuffer b, /* - * Return a pointer to the the Pixmap or XImage being used as the back + * Return a pointer to the Pixmap or XImage being used as the back * color buffer of an XMesaBuffer. This function is a way to get "under * the hood" of X/Mesa so one can manipulate the back buffer directly. * Input: b - the XMesaBuffer diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 3ffd7661e3..e0a6908228 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -431,7 +431,7 @@ extern const int xmesa_kernel8[DITH_DY * DITH_DX]; * If pixelformat==PF_HPCR: * * HP Color Recovery dithering (ad@lms.be 30/08/95) - * HP has on it's 8-bit 700-series computers, a feature called + * HP has on its 8-bit 700-series computers, a feature called * 'Color Recovery'. This allows near 24-bit output (so they say). * It is enabled by selecting the 8-bit TrueColor visual AND * corresponding colormap (see tkInitWindow) AND doing some special diff --git a/src/mesa/glapi/glapi_entrypoint.c b/src/mesa/glapi/glapi_entrypoint.c index 5e6e5995f2..c4f43f66a1 100644 --- a/src/mesa/glapi/glapi_entrypoint.c +++ b/src/mesa/glapi/glapi_entrypoint.c @@ -65,7 +65,7 @@ get_entrypoint_address(GLuint functionOffset) #endif -#if defined(PTHREADS) || defined(GLX_USE_TLS) +#if defined(USE_X86_ASM) /** * Perform platform-specific GL API entry-point fixups. @@ -73,7 +73,7 @@ get_entrypoint_address(GLuint functionOffset) static void init_glapi_relocs( void ) { -#if defined(USE_X86_ASM) && defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT) +#if defined(GLX_USE_TLS) && !defined(GLX_X86_READONLY_TEXT) extern unsigned long _x86_get_dispatch(void); char run_time_patch[] = { 0x65, 0xa1, 0, 0, 0, 0 /* movl %gs:0,%eax */ @@ -88,8 +88,63 @@ init_glapi_relocs( void ) curr_func += DISPATCH_FUNCTION_SIZE; } #endif -#ifdef USE_SPARC_ASM - extern void __glapi_sparc_icache_flush(unsigned int *); +} + + +/** + * Generate a dispatch function (entrypoint) which jumps through + * the given slot number (offset) in the current dispatch table. + * We need assembly language in order to accomplish this. + */ +_glapi_proc +generate_entrypoint(GLuint functionOffset) +{ + /* 32 is chosen as something of a magic offset. For x86, the dispatch + * at offset 32 is the first one where the offset in the + * "jmp OFFSET*4(%eax)" can't be encoded in a single byte. + */ + const GLubyte * const template_func = gl_dispatch_functions_start + + (DISPATCH_FUNCTION_SIZE * 32); + GLubyte * const code = (GLubyte *) _glapi_exec_malloc(DISPATCH_FUNCTION_SIZE); + + + if ( code != NULL ) { + (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE); + fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset ); + } + + return (_glapi_proc) code; +} + + +/** + * This function inserts a new dispatch offset into the assembly language + * stub that was generated with the preceeding function. + */ +void +fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset) +{ + GLubyte * const code = (GLubyte *) entrypoint; + +#if defined(GLX_USE_TLS) + *((unsigned int *)(code + 8)) = 4 * offset; +#elif defined(THREADS) + *((unsigned int *)(code + 11)) = 4 * offset; + *((unsigned int *)(code + 22)) = 4 * offset; +#else + *((unsigned int *)(code + 7)) = 4 * offset; +#endif +} + + +#elif defined(USE_SPARC_ASM) + +extern void __glapi_sparc_icache_flush(unsigned int *); + +static void +init_glapi_relocs( void ) +{ +#if defined(PTHREADS) || defined(GLX_USE_TLS) static const unsigned int template[] = { #ifdef GLX_USE_TLS 0x05000000, /* sethi %hi(_glapi_tls_Dispatch), %g2 */ @@ -155,7 +210,7 @@ init_glapi_relocs( void ) int idx; #endif -#if defined(GLX_USE_TLS) +#ifdef GLX_USE_TLS code[0] = template[0] | (dispatch >> 10); code[1] = template[1]; __glapi_sparc_icache_flush(&code[0]); @@ -215,51 +270,10 @@ init_glapi_relocs( void ) #endif } -void -init_glapi_relocs_once( void ) -{ - static pthread_once_t once_control = PTHREAD_ONCE_INIT; - pthread_once( & once_control, init_glapi_relocs ); -} - -#else - -void -init_glapi_relocs_once( void ) { } - -#endif /* defined(PTHREADS) || defined(GLX_USE_TLS) */ - - -#ifdef USE_SPARC_ASM -extern void __glapi_sparc_icache_flush(unsigned int *); -#endif -/** - * Generate a dispatch function (entrypoint) which jumps through - * the given slot number (offset) in the current dispatch table. - * We need assembly language in order to accomplish this. - */ _glapi_proc generate_entrypoint(GLuint functionOffset) { -#if defined(USE_X86_ASM) - /* 32 is chosen as something of a magic offset. For x86, the dispatch - * at offset 32 is the first one where the offset in the - * "jmp OFFSET*4(%eax)" can't be encoded in a single byte. - */ - const GLubyte * const template_func = gl_dispatch_functions_start - + (DISPATCH_FUNCTION_SIZE * 32); - GLubyte * const code = (GLubyte *) malloc(DISPATCH_FUNCTION_SIZE); - - - if ( code != NULL ) { - (void) memcpy(code, template_func, DISPATCH_FUNCTION_SIZE); - fill_in_entrypoint_offset( (_glapi_proc) code, functionOffset ); - } - - return (_glapi_proc) code; -#elif defined(USE_SPARC_ASM) - #if defined(PTHREADS) || defined(GLX_USE_TLS) static const unsigned int template[] = { 0x07000000, /* sethi %hi(0), %g3 */ @@ -274,7 +288,7 @@ generate_entrypoint(GLuint functionOffset) extern unsigned int __glapi_sparc_pthread_stub; unsigned long call_dest = (unsigned long ) &__glapi_sparc_pthread_stub; #endif - unsigned int *code = (unsigned int *) malloc(sizeof(template)); + unsigned int *code = (unsigned int *) _glapi_exec_malloc(sizeof(template)); if (code) { code[0] = template[0] | (functionOffset & 0x3fffff); code[1] = template[1]; @@ -287,45 +301,52 @@ generate_entrypoint(GLuint functionOffset) } return (_glapi_proc) code; #endif - -#else - (void) functionOffset; - return NULL; -#endif /* USE_*_ASM */ } -/** - * This function inserts a new dispatch offset into the assembly language - * stub that was generated with the preceeding function. - */ void fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset) { -#if defined(USE_X86_ASM) - GLubyte * const code = (GLubyte *) entrypoint; - -#if DISPATCH_FUNCTION_SIZE == 32 - *((unsigned int *)(code + 11)) = 4 * offset; - *((unsigned int *)(code + 22)) = 4 * offset; -#elif DISPATCH_FUNCTION_SIZE == 16 && defined( GLX_USE_TLS ) - *((unsigned int *)(code + 8)) = 4 * offset; -#elif DISPATCH_FUNCTION_SIZE == 16 - *((unsigned int *)(code + 7)) = 4 * offset; -#else -# error Invalid DISPATCH_FUNCTION_SIZE! -#endif - -#elif defined(USE_SPARC_ASM) unsigned int *code = (unsigned int *) entrypoint; + code[0] &= ~0x3fffff; code[0] |= (offset * sizeof(void *)) & 0x3fffff; __glapi_sparc_icache_flush(&code[0]); -#else +} + + +#else /* USE_*_ASM */ +static void +init_glapi_relocs( void ) +{ +} + + +_glapi_proc +generate_entrypoint(GLuint functionOffset) +{ + (void) functionOffset; + return NULL; +} + + +void +fill_in_entrypoint_offset(_glapi_proc entrypoint, GLuint offset) +{ /* an unimplemented architecture */ (void) entrypoint; (void) offset; +} #endif /* USE_*_ASM */ + + +void +init_glapi_relocs_once( void ) +{ +#if defined(PTHREADS) || defined(GLX_USE_TLS) + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + pthread_once( & once_control, init_glapi_relocs ); +#endif } diff --git a/src/mesa/glapi/glapi_execmem.c b/src/mesa/glapi/glapi_execmem.c new file mode 100644 index 0000000000..6a1fac597f --- /dev/null +++ b/src/mesa/glapi/glapi_execmem.c @@ -0,0 +1,127 @@ +/* + * Mesa 3-D graphics library + * Version: 6.5 + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file glapi_execmem.c + * + * Function for allocating executable memory for dispatch stubs. + * + * Copied from main/execmem.c and simplified for dispatch stubs. + */ + + +#ifdef HAVE_DIX_CONFIG_H +#include <dix-config.h> +#include "glapi/mesa.h" +#else +#include "main/compiler.h" +#endif + +#include "glapi/glthread.h" + + +#if defined(__linux__) || defined(__OpenBSD__) || defined(_NetBSD__) || defined(__sun) + +#include <unistd.h> +#include <sys/mman.h> + +#ifdef MESA_SELINUX +#include <selinux/selinux.h> +#endif + + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + + +#define EXEC_MAP_SIZE (4*1024) + +_glthread_DECLARE_STATIC_MUTEX(exec_mutex); + +static unsigned int head = 0; + +static unsigned char *exec_mem = NULL; + + +/* + * Dispatch stubs are of fixed size and never freed. Thus, we do not need to + * overlay a heap, we just mmap a page and manage through an index. + */ + +static int +init_map(void) +{ +#ifdef MESA_SELINUX + if (is_selinux_enabled()) { + if (!security_get_boolean_active("allow_execmem") || + !security_get_boolean_pending("allow_execmem")) + return 0; + } +#endif + + if (!exec_mem) + exec_mem = mmap(NULL, EXEC_MAP_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + return (exec_mem != MAP_FAILED); +} + + +void * +_glapi_exec_malloc(unsigned int size) +{ + void *addr = NULL; + + _glthread_LOCK_MUTEX(exec_mutex); + + if (!init_map()) + goto bail; + + /* free space check, assumes no integer overflow */ + if (head + size > EXEC_MAP_SIZE) + goto bail; + + /* allocation, assumes proper addr and size alignement */ + addr = exec_mem + head; + head += size; + +bail: + _glthread_UNLOCK_MUTEX(exec_mutex); + + return addr; +} + + +#else + +void * +_glapi_exec_malloc(unsigned int size) +{ + return malloc(size); +} + + +#endif diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c index 295657875d..c73e8dd3b0 100644 --- a/src/mesa/glapi/glapi_getproc.c +++ b/src/mesa/glapi/glapi_getproc.c @@ -200,12 +200,6 @@ struct _glapi_function { }; -/* - * Number of extension functions which we can dynamically add at runtime. - */ -#define MAX_EXTENSION_FUNCS 300 - - static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS]; static GLuint NumExtEntryPoints = 0; diff --git a/src/mesa/glapi/glapi_priv.h b/src/mesa/glapi/glapi_priv.h index 7cd81ee8dc..0e2de460f2 100644 --- a/src/mesa/glapi/glapi_priv.h +++ b/src/mesa/glapi/glapi_priv.h @@ -28,6 +28,9 @@ #include "glthread.h" + +/* getproc */ + extern void _glapi_check_table_not_null(const struct _glapi_table *table); @@ -36,6 +39,14 @@ extern void _glapi_check_table(const struct _glapi_table *table); +/* execmem */ + +extern void * +_glapi_exec_malloc(GLuint size); + + +/* entrypoint */ + extern void init_glapi_relocs_once(void); @@ -52,15 +63,35 @@ extern _glapi_proc get_entrypoint_address(GLuint functionOffset); -#if defined(USE_X64_64_ASM) && defined(GLX_USE_TLS) -# define DISPATCH_FUNCTION_SIZE 16 -#elif defined(USE_X86_ASM) -# if defined(THREADS) && !defined(GLX_USE_TLS) +/** + * Size (in bytes) of dispatch function (entrypoint). + */ +#if defined(USE_X86_ASM) +# if defined(GLX_USE_TLS) +# define DISPATCH_FUNCTION_SIZE 16 +# elif defined(THREADS) # define DISPATCH_FUNCTION_SIZE 32 # else # define DISPATCH_FUNCTION_SIZE 16 # endif #endif +#if defined(USE_X64_64_ASM) +# if defined(GLX_USE_TLS) +# define DISPATCH_FUNCTION_SIZE 16 +# endif +#endif + + +/** + * Number of extension functions which we can dynamically add at runtime. + * + * Number of extension functions is also subject to the size of backing exec + * mem we allocate. For the common case of dispatch stubs with size 16 bytes, + * the two limits will be hit simultaneously. For larger dispatch function + * sizes, MAX_EXTENSION_FUNCS is effectively reduced. + */ +#define MAX_EXTENSION_FUNCS 256 + #endif diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 197de09b22..7c02faaa53 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -1072,7 +1072,7 @@ struct dd_function_table { * These are the initial values to be installed into dispatch by * mesa. If the T&L driver wants to modify the dispatch table * while installed, it must do so itself. It would be possible for - * the vertexformat to install it's own initial values for these + * the vertexformat to install its own initial values for these * functions, but this way there is an obvious list of what is * expected of the driver. * diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c index 149853f7ac..04acf05e52 100644 --- a/src/mesa/main/texcompress_fxt1.c +++ b/src/mesa/main/texcompress_fxt1.c @@ -476,7 +476,7 @@ fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv, * for each sample color * sort to nearest vector. * - * replace each vector with the centroid of it's matching colors. + * replace each vector with the centroid of its matching colors. * * repeat until RMS doesn't improve. * diff --git a/src/mesa/math/m_debug_util.h b/src/mesa/math/m_debug_util.h index 2e67db8e55..ed11c849ec 100644 --- a/src/mesa/math/m_debug_util.h +++ b/src/mesa/math/m_debug_util.h @@ -61,7 +61,7 @@ extern long counter_overhead; */ extern char *mesa_profile; -/* Modify the the number of tests if you like. +/* Modify the number of tests if you like. * We take the minimum of all results, because every error should be * positive (time used by other processes, task switches etc). * It is assumed that all calculations are done in the cache. diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c index ef8a40fbec..4b33d0bbb3 100644 --- a/src/mesa/math/m_matrix.c +++ b/src/mesa/math/m_matrix.c @@ -889,7 +889,7 @@ _math_matrix_rotate( GLmatrix *mat, * Y-axis to bring the axis vector parallel with the X-axis. The * rotation about the X-axis is then performed. Ry and Rz are * simply the respective inverse transforms to bring the arbitrary - * axis back to it's original orientation. The first transforms + * axis back to its original orientation. The first transforms * Rz' and Ry' are considered inverses, since the data from the * arbitrary axis gives you info on how to get to it, not how * to get away from it, and an inverse must be applied. diff --git a/src/mesa/shader/lex.yy.c b/src/mesa/shader/lex.yy.c index d1af35fedb..a08617ff8d 100644 --- a/src/mesa/shader/lex.yy.c +++ b/src/mesa/shader/lex.yy.c @@ -53,6 +53,7 @@ typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ /* Limits of integral types. */ #ifndef INT8_MIN @@ -83,8 +84,6 @@ typedef unsigned int flex_uint32_t; #define UINT32_MAX (4294967295U) #endif -#endif /* ! C99 */ - #endif /* ! FLEXINT_H */ #ifdef __cplusplus @@ -158,15 +157,7 @@ typedef void* yyscan_t; /* Size of default input buffer. */ #ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else #define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ #endif /* The state buf must be large enough to hold one state per character in the main buffer. @@ -1161,7 +1152,7 @@ handle_ident(struct asm_parser_state *state, const char *text, YYSTYPE *lval) } while(0); #define YY_EXTRA_TYPE struct asm_parser_state * -#line 1165 "lex.yy.c" +#line 1156 "lex.yy.c" #define INITIAL 0 @@ -1298,12 +1289,7 @@ static int input (yyscan_t yyscanner ); /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else #define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ @@ -1311,7 +1297,7 @@ static int input (yyscan_t yyscanner ); /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) +#define ECHO fwrite( yytext, yyleng, 1, yyout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -1322,7 +1308,7 @@ static int input (yyscan_t yyscanner ); if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ - size_t n; \ + unsigned n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ @@ -1410,7 +1396,7 @@ YY_DECL #line 157 "program_lexer.l" -#line 1414 "lex.yy.c" +#line 1400 "lex.yy.c" yylval = yylval_param; @@ -2212,7 +2198,7 @@ case 142: YY_RULE_SETUP #line 326 "program_lexer.l" { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } YY_BREAK @@ -2224,7 +2210,7 @@ YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP #line 330 "program_lexer.l" { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } YY_BREAK @@ -2232,7 +2218,7 @@ case 144: YY_RULE_SETUP #line 334 "program_lexer.l" { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } YY_BREAK @@ -2240,7 +2226,7 @@ case 145: YY_RULE_SETUP #line 338 "program_lexer.l" { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } YY_BREAK @@ -2474,7 +2460,7 @@ YY_RULE_SETUP #line 481 "program_lexer.l" ECHO; YY_BREAK -#line 2478 "lex.yy.c" +#line 2464 "lex.yy.c" case YY_STATE_EOF(INITIAL): yyterminate(); @@ -3242,8 +3228,8 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) /** Setup the input buffer state to scan the given bytes. The next call to yylex() will * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param bytes the byte buffer to scan + * @param len the number of bytes in the buffer pointed to by @a bytes. * @param yyscanner The scanner object. * @return the newly allocated buffer state object. */ diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index 224350caac..28c797a4ba 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -97,8 +97,8 @@ #define COND_EQ 2 /**< equal to zero */ #define COND_LT 3 /**< less than zero */ #define COND_UN 4 /**< unordered (NaN) */ -#define COND_GE 5 /**< greater then or equal to zero */ -#define COND_LE 6 /**< less then or equal to zero */ +#define COND_GE 5 /**< greater than or equal to zero */ +#define COND_LE 6 /**< less than or equal to zero */ #define COND_NE 7 /**< not equal to zero */ #define COND_TR 8 /**< always true */ #define COND_FL 9 /**< always false */ diff --git a/src/mesa/shader/program_lexer.l b/src/mesa/shader/program_lexer.l index 83bc5089d9..b00765793d 100644 --- a/src/mesa/shader/program_lexer.l +++ b/src/mesa/shader/program_lexer.l @@ -324,19 +324,19 @@ ARRAYSHADOW2D { return_token_or_IDENTIFIER(require_ARB_fp && require return INTEGER; } {num}?{frac}{exp}? { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } {num}"."/[^.] { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } {num}{exp} { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } {num}"."{exp} { - yylval->real = _mesa_strtod(yytext, NULL); + yylval->real = (float) _mesa_strtod(yytext, NULL); return REAL; } diff --git a/src/mesa/shader/program_parser.h b/src/mesa/shader/program_parser.h index 730466c30f..be952d4b9c 100644 --- a/src/mesa/shader/program_parser.h +++ b/src/mesa/shader/program_parser.h @@ -62,7 +62,7 @@ struct asm_symbol { */ unsigned param_binding_swizzle; - /* This is how many entries in the the program_parameter_list we take up + /* This is how many entries in the program_parameter_list we take up * with our state tokens or constants. Note that this is _not_ the same as * the number of param registers we eventually use. */ diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc index 8b7771c284..a25ca55bc4 100644 --- a/src/mesa/shader/slang/library/slang_common_builtin.gc +++ b/src/mesa/shader/slang/library/slang_common_builtin.gc @@ -695,7 +695,7 @@ vec3 normalize(const vec3 v) { // const float s = inversesqrt(dot(v, v)); // __retVal = v * s; -// XXX note, we _could_ use __retVal.w instead of tmp and and save a +// XXX note, we _could_ use __retVal.w instead of tmp and save a // register, but that's actually a compilation error because v is a vec3 // and the .w suffix is illegal. Oh well. float tmp; diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak index 0c16fcb444..d6ae696e39 100644 --- a/src/mesa/sources.mak +++ b/src/mesa/sources.mak @@ -89,6 +89,7 @@ GLAPI_SOURCES = \ glapi/glapi.c \ glapi/glapi_dispatch.c \ glapi/glapi_entrypoint.c \ + glapi/glapi_execmem.c \ glapi/glapi_getproc.c \ glapi/glapi_nop.c \ glapi/glthread.c diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 0b2e3f5381..b446b2079c 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -138,7 +138,6 @@ static void load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; const GLuint rSize = ctx->PixelMaps.RtoR.Size; const GLuint gSize = ctx->PixelMaps.GtoG.Size; @@ -151,7 +150,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) transfer = st_cond_flush_get_tex_transfer(st_context(ctx), pt, 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, texSize, texSize); - dest = (uint *) screen->transfer_map(screen, transfer); + dest = (uint *) pipe->transfer_map(pipe, transfer); /* Pack four 1D maps into a 2D texture: * R map is placed horizontally, indexed by S, in channel 0 @@ -172,8 +171,8 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) } } - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 33e43ddcc4..01aba3e3dd 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -129,7 +129,6 @@ accum_accum(struct st_context *st, GLfloat value, struct st_renderbuffer *color_strb) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *color_trans; size_t stride = acc_strb->stride; GLubyte *data = acc_strb->data; @@ -145,7 +144,7 @@ accum_accum(struct st_context *st, GLfloat value, buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf); + pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf); switch (acc_strb->format) { case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -166,7 +165,7 @@ accum_accum(struct st_context *st, GLfloat value, } free(buf); - screen->tex_transfer_destroy(color_trans); + pipe->tex_transfer_destroy(pipe, color_trans); } @@ -177,7 +176,6 @@ accum_load(struct st_context *st, GLfloat value, struct st_renderbuffer *color_strb) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *color_trans; size_t stride = acc_strb->stride; GLubyte *data = acc_strb->data; @@ -194,7 +192,7 @@ accum_load(struct st_context *st, GLfloat value, buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf); + pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf); switch (acc_strb->format) { case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -215,7 +213,7 @@ accum_load(struct st_context *st, GLfloat value, } free(buf); - screen->tex_transfer_destroy(color_trans); + pipe->tex_transfer_destroy(pipe, color_trans); } @@ -226,7 +224,6 @@ accum_return(GLcontext *ctx, GLfloat value, struct st_renderbuffer *color_strb) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; const GLubyte *colormask = ctx->Color.ColorMask[0]; enum pipe_transfer_usage usage; struct pipe_transfer *color_trans; @@ -251,7 +248,7 @@ accum_return(GLcontext *ctx, GLfloat value, width, height); if (usage & PIPE_TRANSFER_READ) - pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf); + pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf); switch (acc_strb->format) { case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -280,10 +277,10 @@ accum_return(GLcontext *ctx, GLfloat value, _mesa_problem(NULL, "unexpected format in st_clear_accum_buffer()"); } - pipe_put_tile_rgba(color_trans, 0, 0, width, height, buf); + pipe_put_tile_rgba(pipe, color_trans, 0, 0, width, height, buf); free(buf); - screen->tex_transfer_destroy(color_trans); + pipe->tex_transfer_destroy(pipe, color_trans); } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index f326601c3b..dfd8925edf 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -259,7 +259,6 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, const GLubyte *bitmap) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *transfer; ubyte *dest; struct pipe_texture *pt; @@ -285,7 +284,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, PIPE_TRANSFER_WRITE, 0, 0, width, height); - dest = screen->transfer_map(screen, transfer); + dest = pipe->transfer_map(pipe, transfer); /* Put image into texture transfer */ memset(dest, 0xff, height * transfer->stride); @@ -295,8 +294,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, _mesa_unmap_pbo_source(ctx, unpack); /* Release transfer */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); return pt; } @@ -520,7 +519,6 @@ static void reset_cache(struct st_context *st) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct bitmap_cache *cache = st->bitmap.cache; /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/ @@ -532,7 +530,7 @@ reset_cache(struct st_context *st) cache->ymax = -1000000; if (cache->trans) { - screen->tex_transfer_destroy(cache->trans); + pipe->tex_transfer_destroy(pipe, cache->trans); cache->trans = NULL; } @@ -570,7 +568,6 @@ static void create_cache_trans(struct st_context *st) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct bitmap_cache *cache = st->bitmap.cache; if (cache->trans) @@ -583,7 +580,7 @@ create_cache_trans(struct st_context *st) PIPE_TRANSFER_WRITE, 0, 0, BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT); - cache->buffer = screen->transfer_map(screen, cache->trans); + cache->buffer = pipe->transfer_map(pipe, cache->trans); /* init image to all 0xff */ memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT); @@ -601,7 +598,6 @@ st_flush_bitmap_cache(struct st_context *st) if (st->ctx->DrawBuffer) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; assert(cache->xmin <= cache->xmax); @@ -617,10 +613,10 @@ st_flush_bitmap_cache(struct st_context *st) if (cache->trans) { if (0) print_cache(cache); - screen->transfer_unmap(screen, cache->trans); + pipe->transfer_unmap(pipe, cache->trans); cache->buffer = NULL; - screen->tex_transfer_destroy(cache->trans); + pipe->tex_transfer_destroy(pipe, cache->trans); cache->trans = NULL; } @@ -823,7 +819,6 @@ void st_destroy_bitmap(struct st_context *st) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct bitmap_cache *cache = st->bitmap.cache; @@ -840,8 +835,8 @@ st_destroy_bitmap(struct st_context *st) if (cache) { if (cache->trans) { - screen->transfer_unmap(screen, cache->trans); - screen->tex_transfer_destroy(cache->trans); + pipe->transfer_unmap(pipe, cache->trans); + pipe->tex_transfer_destroy(pipe, cache->trans); } pipe_texture_reference(&st->bitmap.cache->texture, NULL); free(st->bitmap.cache); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 8eb76f4131..c44d0fc3e8 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -349,7 +349,6 @@ make_texture(struct st_context *st, { GLcontext *ctx = st->ctx; struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; gl_format mformat; struct pipe_texture *pt; enum pipe_format pipeFormat; @@ -391,7 +390,7 @@ make_texture(struct st_context *st, width, height); /* map texture transfer */ - dest = screen->transfer_map(screen, transfer); + dest = pipe->transfer_map(pipe, transfer); /* Put image into texture transfer. @@ -411,8 +410,8 @@ make_texture(struct st_context *st, unpack); /* unmap */ - screen->transfer_unmap(screen, transfer); - screen->tex_transfer_destroy(transfer); + pipe->transfer_unmap(pipe, transfer); + pipe->tex_transfer_destroy(pipe, transfer); assert(success); @@ -658,7 +657,6 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *strb; enum pipe_transfer_usage usage; struct pipe_transfer *pt; @@ -692,7 +690,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, usage, x, y, width, height); - stmap = screen->transfer_map(screen, pt); + stmap = pipe->transfer_map(pipe, pt); pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels); assert(pixels); @@ -792,8 +790,8 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, _mesa_unmap_pbo_source(ctx, &clippedUnpack); /* unmap the stencil buffer */ - screen->transfer_unmap(screen, pt); - screen->tex_transfer_destroy(pt); + pipe->transfer_unmap(pipe, pt); + pipe->tex_transfer_destroy(pipe, pt); } @@ -856,7 +854,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, GLint dstx, GLint dsty) { struct st_renderbuffer *rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); - struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_context *pipe = ctx->st->pipe; enum pipe_transfer_usage usage; struct pipe_transfer *ptDraw; ubyte *drawMap; @@ -892,7 +890,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, assert(util_format_get_blockheight(ptDraw->texture->format) == 1); /* map the stencil buffer */ - drawMap = screen->transfer_map(screen, ptDraw); + drawMap = pipe->transfer_map(pipe, ptDraw); /* draw */ /* XXX PixelZoom not handled yet */ @@ -945,8 +943,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, free(buffer); /* unmap the stencil buffer */ - screen->transfer_unmap(screen, ptDraw); - screen->tex_transfer_destroy(ptDraw); + pipe->transfer_unmap(pipe, ptDraw); + pipe->tex_transfer_destroy(pipe, ptDraw); } @@ -1084,8 +1082,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, if (0) { /* debug */ - debug_dump_surface("copypixsrcsurf", psRead); - debug_dump_surface("copypixtemptex", psTex); + debug_dump_surface(pipe, "copypixsrcsurf", psRead); + debug_dump_surface(pipe, "copypixtemptex", psTex); } pipe_surface_reference(&psRead, NULL); @@ -1115,21 +1113,21 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, /* alternate path using get/put_tile() */ GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(ptRead, 0, 0, width, height, buf); - pipe_put_tile_rgba(ptTex, 0, 0, width, height, buf); + pipe_get_tile_rgba(pipe, ptRead, 0, 0, width, height, buf); + pipe_put_tile_rgba(pipe, ptTex, 0, 0, width, height, buf); free(buf); } else { /* GL_DEPTH */ GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(ptRead, 0, 0, width, height, buf); - pipe_put_tile_z(ptTex, 0, 0, width, height, buf); + pipe_get_tile_z(pipe, ptRead, 0, 0, width, height, buf); + pipe_put_tile_z(pipe, ptTex, 0, 0, width, height, buf); free(buf); } - screen->tex_transfer_destroy(ptRead); - screen->tex_transfer_destroy(ptTex); + pipe->tex_transfer_destroy(pipe, ptRead); + pipe->tex_transfer_destroy(pipe, ptTex); } /* draw textured quad */ diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index f0a02ab766..7afb275fe2 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -64,7 +64,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLvoid *pixels) { struct gl_framebuffer *fb = ctx->ReadBuffer; - struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_context *pipe = ctx->st->pipe; struct st_renderbuffer *strb = st_renderbuffer(fb->_StencilBuffer); struct pipe_transfer *pt; ubyte *stmap; @@ -82,7 +82,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, width, height); /* map the stencil buffer */ - stmap = screen->transfer_map(screen, pt); + stmap = pipe->transfer_map(pipe, pt); /* width should never be > MAX_WIDTH since we did clipping earlier */ ASSERT(width <= MAX_WIDTH); @@ -162,8 +162,8 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } /* unmap the stencil buffer */ - screen->transfer_unmap(screen, pt); - screen->tex_transfer_destroy(pt); + pipe->transfer_unmap(pipe, pt); + pipe->tex_transfer_destroy(pipe, pt); } @@ -235,7 +235,6 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb, { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *trans; const GLubyte *map; GLubyte *dst; @@ -254,9 +253,9 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb, return GL_FALSE; } - map = screen->transfer_map(screen, trans); + map = pipe->transfer_map(pipe, trans); if (!map) { - screen->tex_transfer_destroy(trans); + pipe->tex_transfer_destroy(pipe, trans); return GL_FALSE; } @@ -318,8 +317,8 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb, ; /* nothing */ } - screen->transfer_unmap(screen, trans); - screen->tex_transfer_destroy(trans); + pipe->transfer_unmap(pipe, trans); + pipe->tex_transfer_destroy(pipe, trans); } return GL_TRUE; @@ -338,7 +337,6 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLvoid *dest) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; GLfloat temp[MAX_WIDTH][4]; const GLbitfield transferOps = ctx->_ImageTransferState; GLsizei i, j; @@ -446,7 +444,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / ((1 << 24) - 1); - pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff)); @@ -461,7 +459,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, assert(format == GL_DEPTH_STENCIL_EXT); for (i = 0; i < height; i++) { GLuint *zshort = (GLuint *)dst; - pipe_get_tile_raw(trans, 0, y, width, 1, dst, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, dst, 0); y += yStep; /* Reverse into 24/8 */ for (j = 0; j < width; j++) { @@ -478,7 +476,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / ((1 << 24) - 1); - pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ((ztemp[j] >> 8) & 0xffffff)); @@ -492,7 +490,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* XXX: unreachable code -- should be before st_read_stencil_pixels */ assert(format == GL_DEPTH_STENCIL_EXT); for (i = 0; i < height; i++) { - pipe_get_tile_raw(trans, 0, y, width, 1, dst, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, dst, 0); y += yStep; dst += dstStride; } @@ -503,7 +501,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLushort ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffff; - pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -518,7 +516,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffffffff; - pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0); + pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -532,7 +530,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* RGBA format */ /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { - pipe_get_tile_rgba(trans, 0, y, width, 1, df); + pipe_get_tile_rgba(pipe, trans, 0, y, width, 1, df); y += yStep; df += dfStride; if (!dfStride) { @@ -544,7 +542,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, } } - screen->tex_transfer_destroy(trans); + pipe->tex_transfer_destroy(pipe, trans); _mesa_unmap_pbo_dest(ctx, &clippedPacking); } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index c849132e74..626e6ad660 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -371,7 +371,8 @@ compress_with_blit(GLcontext * ctx, { const GLuint dstImageOffsets[1] = {0}; struct st_texture_image *stImage = st_texture_image(texImage); - struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; gl_format mesa_format; struct pipe_texture templ; struct pipe_texture *src_tex; @@ -421,7 +422,7 @@ compress_with_blit(GLcontext * ctx, 0, 0, 0, /* face, level are zero */ PIPE_TRANSFER_WRITE, 0, 0, width, height); /* x, y, w, h */ - map = screen->transfer_map(screen, tex_xfer); + map = pipe->transfer_map(pipe, tex_xfer); _mesa_texstore(ctx, 2, GL_RGBA, mesa_format, map, /* dest ptr */ @@ -433,8 +434,8 @@ compress_with_blit(GLcontext * ctx, pixels, /* source data */ unpack); /* source data packing */ - screen->transfer_unmap(screen, tex_xfer); - screen->tex_transfer_destroy(tex_xfer); + pipe->transfer_unmap(pipe, tex_xfer); + pipe->tex_transfer_destroy(pipe, tex_xfer); /* copy / compress image */ util_blit_pixels_tex(ctx->st->blit, @@ -809,7 +810,8 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct st_texture_image *stImage = st_texture_image(texImage); const GLuint width = texImage->Width; const GLuint height = texImage->Height; @@ -848,7 +850,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, if (st_equal_formats(stImage->pt->format, format, type)) { /* memcpy */ const uint bytesPerRow = width * util_format_get_blocksize(stImage->pt->format); - ubyte *map = screen->transfer_map(screen, tex_xfer); + ubyte *map = pipe->transfer_map(pipe, tex_xfer); GLuint row; for (row = 0; row < height; row++) { GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width, @@ -856,7 +858,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, memcpy(dest, map, bytesPerRow); map += tex_xfer->stride; } - screen->transfer_unmap(screen, tex_xfer); + pipe->transfer_unmap(pipe, tex_xfer); } else { /* format translation via floats */ @@ -871,7 +873,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, debug_printf("%s: fallback format translation\n", __FUNCTION__); /* get float[4] rgba row from surface */ - pipe_get_tile_rgba(tex_xfer, 0, row, width, 1, rgba); + pipe_get_tile_rgba(pipe, tex_xfer, 0, row, width, 1, rgba); _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format, type, dest, &ctx->Pack, transferOps); @@ -1256,7 +1258,6 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, GLsizei width, GLsizei height) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_transfer *src_trans; GLvoid *texDest; enum pipe_transfer_usage transfer_usage; @@ -1309,11 +1310,11 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, /* To avoid a large temp memory allocation, do copy row by row */ for (row = 0; row < height; row++, srcY += yStep) { uint data[MAX_WIDTH]; - pipe_get_tile_z(src_trans, 0, srcY, width, 1, data); + pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data); if (scaleOrBias) { _mesa_scale_and_bias_depth_uint(ctx, width, data); } - pipe_put_tile_z(stImage->transfer, 0, row, width, 1, data); + pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data); } } else { @@ -1335,7 +1336,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, /* XXX this usually involves a lot of int/float conversion. * try to avoid that someday. */ - pipe_get_tile_rgba(src_trans, 0, 0, width, height, tempSrc); + pipe_get_tile_rgba(pipe, src_trans, 0, 0, width, height, tempSrc); /* Store into texture memory. * Note that this does some special things such as pixel transfer @@ -1363,7 +1364,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, } st_texture_image_unmap(ctx->st, stImage); - screen->tex_transfer_destroy(src_trans); + pipe->tex_transfer_destroy(pipe, src_trans); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 8a6e1ed466..7f45e3f548 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -184,7 +184,7 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, /* this is an odd-ball case */ assert(type == GL_UNSIGNED_BYTE); assert(normalized); - return PIPE_FORMAT_A8R8G8B8_UNORM; + return PIPE_FORMAT_B8G8R8A8_UNORM; } if (normalized) { @@ -273,7 +273,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp, } *userSpace = (num_client_arrays == vpv->num_inputs); - /* printf("user space: %d (%d %d)\n", (int) *userSpace,num_client_arrays,vp->num_inputs); */ + /* debug_printf("user space: %s (%d arrays, %d inputs)\n", + (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */ return GL_TRUE; } @@ -293,6 +294,8 @@ get_arrays_bounds(const struct st_vertex_program *vp, const GLubyte *high_addr = NULL; GLuint attr; + /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */ + for (attr = 0; attr < vpv->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; const GLint stride = arrays[mesaAttr]->StrideB; @@ -301,6 +304,9 @@ get_arrays_bounds(const struct st_vertex_program *vp, _mesa_sizeof_type(arrays[mesaAttr]->Type)); const GLubyte *end = start + (max_index * stride) + sz; + /* debug_printf("attr %u: stride %d size %u start %p end %p\n", + attr, stride, sz, start, end); */ + if (attr == 0) { low_addr = start; high_addr = end; @@ -348,7 +354,8 @@ setup_interleaved_attribs(GLcontext *ctx, const GLubyte *low, *high; get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high); - /*printf("buffer range: %p %p %d\n", low, high, high-low);*/ + /* debug_printf("buffer range: %p %p range %d max index %u\n", + low, high, high - low, max_index); */ offset0 = low; if (userSpace) { diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index 1d35e8d657..0a91183f89 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -167,9 +167,7 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf) { GET_CURRENT_CONTEXT(ctx); - static const GLuint invalid_size = 9999999; struct st_renderbuffer *strb; - GLuint width, height, i; /* sanity checks */ assert(ST_SURFACE_FRONT_LEFT == BUFFER_FRONT_LEFT); @@ -183,18 +181,17 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb, strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); if (!strb) { - if (surfIndex == ST_SURFACE_FRONT_LEFT) { - /* Delayed creation when the window system supplies a fake front buffer */ - struct st_renderbuffer *strb_back - = st_renderbuffer(stfb->Base.Attachment[ST_SURFACE_BACK_LEFT].Renderbuffer); - struct gl_renderbuffer *rb - = st_new_renderbuffer_fb(surf->format, strb_back->Base.NumSamples, FALSE); - _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb); - strb = st_renderbuffer(rb); - } else { - /* fail */ + /* create new renderbuffer for this surface now */ + const GLuint numSamples = stfb->Base.Visual.samples; + struct gl_renderbuffer *rb = + st_new_renderbuffer_fb(surf->format, numSamples, FALSE); + if (!rb) { + /* out of memory */ + _mesa_warning(ctx, "Out of memory allocating renderbuffer"); return; } + _mesa_add_renderbuffer(&stfb->Base, surfIndex, rb); + strb = st_renderbuffer(rb); } /* replace the renderbuffer's surface/texture pointers */ @@ -206,39 +203,16 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb, * But when we do, we need to start setting this dirty bit * to ensure the renderbuffer attachements are up-to-date * via update_framebuffer. + * Core Mesa's state validation will update the parent framebuffer's + * size info, etc. */ ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER; + ctx->NewState |= _NEW_BUFFERS; } /* update renderbuffer's width/height */ strb->Base.Width = surf->width; strb->Base.Height = surf->height; - - /* Try to update the framebuffer's width/height from the renderbuffer - * sizes. Before we start drawing, all the rbs _should_ be the same size. - */ - width = height = invalid_size; - for (i = 0; i < BUFFER_COUNT; i++) { - if (stfb->Base.Attachment[i].Renderbuffer) { - if (width == invalid_size) { - width = stfb->Base.Attachment[i].Renderbuffer->Width; - height = stfb->Base.Attachment[i].Renderbuffer->Height; - } - else if (width != stfb->Base.Attachment[i].Renderbuffer->Width || - height != stfb->Base.Attachment[i].Renderbuffer->Height) { - /* inconsistant renderbuffer sizes, bail out */ - return; - } - } - } - - if (width != invalid_size) { - /* OK, the renderbuffers are of a consistant size, so update the - * parent framebuffer's size. - */ - stfb->Base.Width = width; - stfb->Base.Height = height; - } } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index f67d7b4cb5..b2521433c8 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -106,7 +106,6 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) { struct pipe_context *pipe = ctx->st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_texture *pt = st_get_texobj_texture(texObj); const uint baseLevel = texObj->BaseLevel; const uint lastLevel = pt->last_level; @@ -142,8 +141,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, u_minify(pt->width0, dstLevel), u_minify(pt->height0, dstLevel)); - srcData = (ubyte *) screen->transfer_map(screen, srcTrans); - dstData = (ubyte *) screen->transfer_map(screen, dstTrans); + srcData = (ubyte *) pipe->transfer_map(pipe, srcTrans); + dstData = (ubyte *) pipe->transfer_map(pipe, dstTrans); srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->texture->format); dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->texture->format); @@ -161,11 +160,11 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstData, dstStride); /* stride in texels */ - screen->transfer_unmap(screen, srcTrans); - screen->transfer_unmap(screen, dstTrans); + pipe->transfer_unmap(pipe, srcTrans); + pipe->transfer_unmap(pipe, dstTrans); - screen->tex_transfer_destroy(srcTrans); - screen->tex_transfer_destroy(dstTrans); + pipe->tex_transfer_destroy(pipe, srcTrans); + pipe->tex_transfer_destroy(pipe, dstTrans); } } diff --git a/src/mesa/state_tracker/st_inlines.h b/src/mesa/state_tracker/st_inlines.h index e105870bc7..7fcde7b1a9 100644 --- a/src/mesa/state_tracker/st_inlines.h +++ b/src/mesa/state_tracker/st_inlines.h @@ -53,11 +53,11 @@ st_cond_flush_get_tex_transfer(struct st_context *st, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *context = st->pipe; st_teximage_flush_before_map(st, pt, face, level, usage); - return screen->get_tex_transfer(screen, pt, face, level, zslice, usage, - x, y, w, h); + return context->get_tex_transfer(context, pt, face, level, zslice, usage, + x, y, w, h); } static INLINE struct pipe_transfer * @@ -70,9 +70,9 @@ st_no_flush_get_tex_transfer(struct st_context *st, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *context = st->pipe; - return screen->get_tex_transfer(screen, pt, face, level, + return context->get_tex_transfer(context, pt, face, level, zslice, usage, x, y, w, h); } diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 5a45c4358a..10a38befb4 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -192,7 +192,6 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, GLuint x, GLuint y, GLuint w, GLuint h) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct pipe_texture *pt = stImage->pt; DBG("%s \n", __FUNCTION__); @@ -202,7 +201,7 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, usage, x, y, w, h); if (stImage->transfer) - return screen->transfer_map(screen, stImage->transfer); + return pipe->transfer_map(pipe, stImage->transfer); else return NULL; } @@ -212,13 +211,13 @@ void st_texture_image_unmap(struct st_context *st, struct st_texture_image *stImage) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *pipe = st->pipe; DBG("%s\n", __FUNCTION__); - screen->transfer_unmap(screen, stImage->transfer); + pipe->transfer_unmap(pipe, stImage->transfer); - screen->tex_transfer_destroy(stImage->transfer); + pipe->tex_transfer_destroy(pipe, stImage->transfer); } @@ -238,8 +237,7 @@ st_surface_data(struct pipe_context *pipe, const void *src, unsigned src_stride, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - struct pipe_screen *screen = pipe->screen; - void *map = screen->transfer_map(screen, dst); + void *map = pipe->transfer_map(pipe, dst); assert(dst->texture); util_copy_rect(map, @@ -250,7 +248,7 @@ st_surface_data(struct pipe_context *pipe, src, src_stride, srcx, srcy); - screen->transfer_unmap(screen, dst); + pipe->transfer_unmap(pipe, dst); } @@ -265,7 +263,6 @@ st_texture_image_data(struct st_context *st, GLuint src_row_stride, GLuint src_image_stride) { struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; GLuint depth = u_minify(dst->depth0, level); GLuint i; const GLubyte *srcUB = src; @@ -287,7 +284,7 @@ st_texture_image_data(struct st_context *st, u_minify(dst->width0, level), u_minify(dst->height0, level)); /* width, height */ - screen->tex_transfer_destroy(dst_transfer); + pipe->tex_transfer_destroy(pipe, dst_transfer); srcUB += src_image_stride; } diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index 3e36cf9a7e..ed637cac12 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -526,7 +526,7 @@ _swrast_depth_clamp_span( GLcontext *ctx, SWspan *span ) /* Convert floating point values in [0,1] to device Z coordinates in * [0, DepthMax]. - * ex: If the the Z buffer has 24 bits, DepthMax = 0xffffff. + * ex: If the Z buffer has 24 bits, DepthMax = 0xffffff. * * XXX this all falls apart if we have 31 or more bits of Z because * the triangle rasterization code produces unsigned Z values. Negative diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c index f253c854d2..3f581ea02d 100644 --- a/src/mesa/vbo/vbo_save_loopback.c +++ b/src/mesa/vbo/vbo_save_loopback.c @@ -78,7 +78,7 @@ struct loopback_attr { }; /* Don't emit ends and begins on wrapped primitives. Don't replay - * wrapped vertices. If we get here, it's probably because the the + * wrapped vertices. If we get here, it's probably because the * precalculated wrapping is wrong. */ static void loopback_prim( GLcontext *ctx, |